In [None]:
# Since our ML was so good at clustering players according to their metrics, I decided to use a little time
# to make a simulator. 

# This simulator will pick one random player from each cluster and pitch them against one another in a
# randomized golf course. 

# Due to time constraints, this was built with the help of ChatGPT.

In [81]:
import random
import numpy as np
import pandas as pd
import os

path = os.getcwd()

df = pd.read_csv(path + "/data/pga_ready_for_eda_pca.csv")


def generate_golf_course():
    # Initialize an empty list for the course
    course = []
    
    # Define the number of each type of hole
    num_par3 = 4
    num_par5 = 4
    num_par4 = 18 - (num_par3 + num_par5)
    
    # Add the par values to the course list
    course.extend([3] * num_par3)
    course.extend([5] * num_par5)
    course.extend([4] * num_par4)
    
    # Shuffle the course to randomize the order of holes
    random.shuffle(course)
    
    return course

def select_random_players(df):
    unique_clusters = df['cluster'].unique()
    selected_players = []

    for cluster in unique_clusters:
        # Randomly sample one player from each cluster
        players_in_cluster = df[df['cluster'] == cluster]
        selected_player = players_in_cluster.sample(n=1)
        selected_players.append(selected_player)
    
    # Combine the selected players into a single DataFrame
    selected_players_df = pd.concat(selected_players).reset_index(drop=True)
    
    return selected_players_df

# Function to simulate playing a hole deterministically based on player's stats
def play_hole_deterministic(player, par):
    score = par
    
    # Adjust the score based on player's stats
    if par == 3:
        # Par 3 holes depend more on GIR and putting
        if player['gir'] < 65:  # Assuming 65% GIR is average
            score += 1  # Missed green likely leads to bogey
        if player['Average SG Putts'] > 0:
            score -= 1  # Good putter likely to one putt
    elif par == 4:
        # Par 4 holes depend on all aspects
        if player['SG:OTT'] < 0:
            score += 1  # Bad drive likely leads to bogey
        if player['SG:APR'] < 0 and player['gir'] < 65:
            score += 1  # Missed green on approach likely bogey
        if player['Average SG Putts'] > 0 and player['gir'] >= 65:
            score -= 1  # Good putter likely to one putt for birdie
    elif par == 5:
        # Par 5 holes depend more on distance and approach
        if player['Avg Distance'] > 290:  # Assuming 290 yards is average
            score -= 1  # Longer hitters can reach in two for birdie/eagle chance
        if player['SG:APR'] > 0 and player['Avg Distance'] > 290:
            score -= 1  # Good approach after a long drive can lead to eagle

    # Ensure that score is at least 1 (you cannot score 0 or negative on a hole)
    score = max(1, score)
    return score

# Function to simulate playing a hole with randomness based on player's stats
def play_hole_random(player, par):
    # Start with deterministic score
    score = play_hole_deterministic(player, par)
    
    # Add randomness - simulate variability and luck factor
    # This is a simplified model for randomness, which can be adjusted for more complexity
    luck_factor = np.random.normal(0, 0.5)  # Small gaussian noise
    score += luck_factor
    
    # Ensure that score is at least 1 (you cannot score 0 or negative on a hole)
    score = max(1, round(score))  # Round the score to the nearest whole number
    return score

def simulate_round(player, course, deterministic=True):
    # Simulate each hole and sum the scores to get the total score for the round
    if deterministic:
        total_score = sum(play_hole_deterministic(player, par) for par in course)
    else:
        total_score = sum(play_hole_random(player, par) for par in course)
    return total_score

# Example usage:
golf_course = generate_golf_course()
print("Golf Course Pars:", golf_course)

random_players = select_random_players(df)
random_players

round_scores = []

for index, player in random_players.iterrows():
    
    player_name = player['Player Name']
    player_cluster = player['cluster']  # Get the player's cluster
    deterministic_score = simulate_round(player, golf_course, deterministic=True)
    random_score = simulate_round(player, golf_course, deterministic=False)
    
    round_scores.append({
        'Player Name': player_name,
        'Cluster': player_cluster,  # Include the cluster information
        'Deterministic Score': deterministic_score,
        'Random Score': random_score
    })


# Convert the scores into a DataFrame for display
round_scores_df = pd.DataFrame(round_scores)
round_scores_df.sort_values(by="Cluster")

Golf Course Pars: [4, 4, 3, 5, 3, 5, 4, 4, 4, 4, 4, 5, 5, 4, 3, 4, 3, 4]


Unnamed: 0,Player Name,Cluster,Deterministic Score,Random Score
2,Rocco Mediate,1,82,82
0,Martin Piller,2,88,86
1,David Toms,3,68,65
3,Martin Laird,4,50,50


In [54]:
df = pd.read_csv(path + "/data/")

Unnamed: 0,Player Name,Rounds,Fairway Percentage,Avg Distance,gir,Average Putts,Average Scrambling,Average Score,Points,Wins,Top 10,Average SG Putts,Average SG Total,SG:OTT,SG:APR,SG:ARG,Money,No Seasons,cluster
0,Aaron Baddeley,700.0,53.427778,293.677778,61.945556,28.295556,60.074444,71.008444,5103,2.0,21.0,0.473333,0.152444,-0.342667,-0.310333,0.333111,10419470,9,2
1,Aaron Watkins,46.0,63.030000,277.100000,62.250000,28.750000,59.530000,71.770000,126,0.0,1.0,0.402000,-0.589000,-0.568000,-0.707000,0.285000,217495,1,3
2,Aaron Wise,90.0,63.330000,302.900000,68.810000,29.150000,56.280000,70.247000,1086,1.0,3.0,0.238000,0.703000,0.410000,0.012000,0.042000,3486407,1,1
3,Abraham Ancer,149.0,64.530000,286.250000,64.150000,28.985000,59.680000,71.398000,736,0.0,4.0,-0.238500,-0.203500,0.150500,-0.133000,0.017500,1839506,2,3
4,Adam Hadwin,378.0,64.375000,289.600000,66.387500,28.847500,59.680000,70.768250,3207,1.0,13.0,0.319750,0.486250,0.075000,0.026250,0.043250,7392920,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433,Zac Blair,376.0,67.557500,275.150000,64.765000,28.802500,63.617500,71.106500,1631,0.0,4.0,0.240000,0.020000,-0.216750,-0.188750,0.186000,3266994,4,3
434,Zach Johnson,803.0,68.653333,282.144444,66.458889,28.727778,60.882222,70.107444,10935,5.0,42.0,0.362222,0.896778,0.052778,0.322222,0.165222,27540362,9,3
435,Zack Miller,86.0,57.060000,299.500000,61.790000,29.450000,54.260000,71.890000,290,0.0,1.0,-0.483000,-1.051000,0.057000,-0.612000,-0.012000,427341,1,4
436,Zecheng Dou,53.0,54.760000,291.900000,63.720000,30.000000,51.560000,72.773000,21,0.0,0.0,-0.667000,-1.198000,-0.079000,-0.265000,-0.187000,90993,1,4
