In [2]:
%pip install "stable-baselines3[extra]>=2.0.0a4"

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Function to check if a point is inside a polygon
def point_in_polygon(ptc, polygon):
    # Get the x and y coordinates of the point
    x, y = ptc

    # Initialize a flag to check if the point is inside the polygon
    inside = False

    # Take the first point in the polygon
    p1 = polygon[0]

    # Loop through each side of the polygon
    for i in range(1, len(polygon) + 1):
        # Take the next point in the polygon
        p2 = polygon[i % len(polygon)]

        # Check if the point is roughly at the same height as the current side
        if y > min(p1[1], p2[1]):

            # Check if the point is not too high above the side or too low below the side
            if y <= max(p1[1], p2[1]):

                # Check if the point is to the left of the rightmost point of the side
                if x <= max(p1[0], p2[0]):

                    # Calculate the x-coordinate where a line from the point intersects with the side
                    x_intersection = (y - p1[1]) * (p2[0] - p1[0]) / (p2[1] - p1[1]) + p1[0]

                    # Check if the point is on or to the left of the calculated x-coordinate
                    if p1[0] == p2[0] or x <= x_intersection:

                        # Flip the inside flag, indicating that the point is inside the polygon
                        inside = not inside

        # Move to the next side of the polygon
        p1 = p2

    # Return whether the point is inside the polygon or not
    return inside

In [4]:
import numpy as np
import math
import gymnasium as gym
from gymnasium import spaces

class GolfEnv(gym.Env):
  def __init__(self):
    #Define the size of the grid
    self.grid_width = 200
    self.grid_height = 300
    self.shot_counter = 0

    #Define the action space as a Box space
    self.action_space = spaces.Box(
        low=np.array([0, 0, 0]),  # Minimum values for direction, club-type and swing strength #######
        high=np.array([180, 12, 3]),  # Maximum values for direction, club-type and swing strength ########
        dtype=np.float32
    )

    #Define the observation space
    self.observation_space = spaces.Dict({
          'x':spaces.Discrete(self.grid_width),
          'y':spaces.Discrete(self.grid_height)
      })

    #ball position
    self.ball_position = {
      'x': 50,
      'y': 0,
    }
    #green position
    self.green_left_corner = [self.grid_width-20, self.grid_height-20]
    self.green_right_corner = [self.grid_width, self.grid_height]

    #creating a dictionary to convert club to distance
    self.club_distances = {
            0: 200,
            1: 180,
            2: 170,
            3: 160,
            4: 150,
            5: 145,
            6: 138,
            7: 127,
            8: 120,
            9: 110,
            10: 97,
            11: 85,
            12: 55,
        }

    self.swing_strength = { #######
            0: 0.25,
            1: 0.5,
            2: 0.75,
            3: 1,
         }


  def step(self, action):

    # Extract direction, club type, and swing strength from the action
    direction = action[0]
    club_type = int(action[1])
    swing_strength = int(action[2])
    print(swing_strength)

    # Use direction and scaled distance by swing strength to calculate the new position
    angle_rad = math.radians(direction)
    scaled_distance = self.club_distances[club_type] * self.swing_strength[swing_strength]  # Scale distance by swing strength ########
    delta_x = scaled_distance * math.cos(angle_rad)
    delta_y = scaled_distance * math.sin(angle_rad)

    # Calculate new position
    new_x = self.ball_position['x'] + delta_x
    new_y = self.ball_position['y'] + delta_y

    new_x = round(new_x)
    new_y = round(new_y)

    #sets the reward to 0
    reward = 0
    terminated=False
    truncated=False
    #checks if the ball position is in the observation space and if not it sets it to the closest edge
    if new_x>self.observation_space['x'].n:
      reward = reward-10
      new_x=self.observation_space['x'].n
    if new_x<0:
      reward = reward-10
      new_x=0
    if new_y>self.observation_space['y'].n:
      reward = reward-10
      new_y=self.observation_space['y'].n

    #sets the ball position to the new position
    self.ball_position['x']=new_x
    self.ball_position['y']=new_y

    #checks if the ball position is on the green
    if (self.green_left_corner[0]<=self.ball_position['x']>=self.green_right_corner[0]) and (self.green_left_corner[1]<=self.ball_position['y']>=self.green_right_corner[1]):
      reward=10
      terminated = True

    bunker = [(25, 53), (26, 42), (36, 34), (49, 33), (56, 44), (78, 95), (86, 138), (86, 45), (78, 147), (67, 142), (62, 148), (60, 176), (42, 186), (31, 176), (29, 168), (30, 150), (57, 180), (58, 111), (29, 60)]

    ptc = (self.ball_position['x'], self.ball_position['y'])

    if point_in_polygon(ptc, bunker):
      reward=10

    #adds a shot to the shot counter
    self.shot_counter+=1

    #limit to 10 shots per round
    if self.shot_counter>=10:
      truncated=True

    observation = {
        'x': self.ball_position['x'],
        'y': self.ball_position['y']
    }

    return observation, reward, terminated, truncated, {}

  def reset(self, seed=None, options=None):
    #sets ball position to the middle of the start of the hole
    self.ball_position = {
        'x': 50,
        'y': 0,
    }

    observation = {
        'x': self.ball_position['x'],
        'y': self.ball_position['y']
    }

    return observation, {}

In [5]:
from stable_baselines3.common.env_checker import check_env

In [6]:
env = GolfEnv()
check_env(env, warn=True)

0
0
2
2
0
2
1
0
2
2
2




In [7]:
golf_env = GolfEnv()

golf_env.ball_position = {'x': 40, 'y': 40}

# Define an action (example: direction=90, club_type=5, strnght=3)
action = np.array([90, 5, 3])

# Call the step method to simulate the shot
observation, reward, terminated, truncated, _ = golf_env.step(action)

# Check if the ball is in the bunker based on the reward
if reward == 10:
    print("The ball is in the bunker!")
else:
    print("The ball is not in the bunker.")

# Print the final ball position
print("Final Ball Position:", golf_env.ball_position)

3
The ball is not in the bunker.
Final Ball Position: {'x': 40, 'y': 185}


**Traning the Model**

In [27]:
env = GolfEnv()
check_env(env)

print(env.observation_space)
print(env.action_space.sample())

0
2
1
0
1
0
0
1
1
2
0
Dict('x': Discrete(200), 'y': Discrete(300))
[25.691448   3.733479   2.3286211]


In [19]:
#from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO, A2C, DQN, HerReplayBuffer, SAC 

from stable_baselines3.common.env_util import make_vec_env

In [20]:
vec_env = make_vec_env(GolfEnv, n_envs=1)

model = SAC("MultiInputPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [21]:
model.learn(total_timesteps=100)

1
1
1
0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 3575     |
|    time_elapsed    | 0        |
|    total_timesteps | 4        |
---------------------------------
2
1
0
2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -1.25    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 7150     |
|    time_elapsed    | 0        |
|    total_timesteps | 8        |
---------------------------------
2
2
1
0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -1.67    |
| time/              |          |
|    episodes        | 12       |
|    fps             | 1017     |
|    time_elapsed    | 0        |
|    total_timesteps | 1

<stable_baselines3.sac.sac.SAC at 0x2ad2cfcff10>

***Testing The Model***

In [38]:
from collections import OrderedDict

x_data = np.array([55], dtype=np.int64)
y_data = np.array([56], dtype=np.int64)
# calling x_data, and y_data to start off the golf ball

obs = OrderedDict([('x', x_data), ('y', y_data)])
# setting the obs to a data type that the model can take, and learn from

action, _ = model.predict(obs, deterministic=True)
# calling the models' next steps with the inital starting point

In [41]:
vec_env.reset()
# reseting the vec_env before the 

obs, reward, done, info = vec_env.step(action)
# running the next step of the model
# if you want to see the next step, then call the vec_env.step(action) again
# should run until the ten steps, and then the model should stop 

print(obs)
# OrderedDict([('x', array([49], dtype=int64)), ('y', array([69], dtype=int64))])
# the arrays for the x, and y are the hits that the algorithm is taking 

1
OrderedDict([('x', array([49], dtype=int64)), ('y', array([69], dtype=int64))])
