<a href="https://colab.research.google.com/github/Aryan-2605/DGCS-AI-DecisionSystem/blob/master/DGCS-ClubRecommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import pandas as pd
from shapely.geometry import Point, Polygon
import ast


# **Cell 1: Imports and Data**

In [None]:
drive.mount('/content/drive')
player_data = pd.read_csv('/content/drive/MyDrive/DGCS/player_data.csv')
hole_data = pd.read_csv('/content/drive/MyDrive/DGCS/Hole_1.csv')
player_simulation = pd.read_csv('/content/drive/MyDrive/DGCS/hole_sim.csv')

display(player_data.head())
display(hole_data.head())
display(player_simulation.head())

Mounted at /content/drive


Unnamed: 0,player_id,Age,Gender,HCP,Driver,Driver_Dispersion,3-Wood,3-Wood_Dispersion,5-Wood,5-Wood_Dispersion,...,9-Iron,9-Iron_Dispersion,PW,PW_Dispersion,GW,GW_Dispersion,SW,SW_Dispersion,LW,LW_Dispersion
0,1,31,Male,15.0,242.0,26.2,220.9,17.8,,,...,115.9,8.3,114.0,3.9,113.6,6.7,99.7,4.5,,
1,2,39,Male,13.4,232.9,22.9,233.2,22.4,,,...,134.3,6.1,117.8,4.3,124.5,6.5,100.4,4.8,,
2,3,37,Male,29.0,161.6,30.1,127.6,22.5,,,...,100.2,9.2,90.5,8.0,86.0,6.2,71.3,8.5,,
3,4,21,Female,6.4,217.8,20.8,,,184.6,13.7,...,,,108.4,5.3,102.6,5.9,94.8,6.5,,
4,5,42,Male,29.2,150.1,26.0,,,136.9,22.9,...,86.4,10.0,102.2,8.5,,,71.1,7.8,,


Unnamed: 0,Area,Coordinates
0,Fairway,"[(51.60492078656368, -0.2198113604936469), (51..."
1,TreeLine,"[(51.60430132835705, -0.22023673406949), (51.6..."
2,TreeLine,"[(51.60398427667845, -0.2201013746057723), (51..."
3,TreeLine,"[(51.60436777154526, -0.2199946487339721), (51..."
4,TreeLine,"[(51.60407076234819, -0.2191739058443709), (51..."


Unnamed: 0,round_id,player_id,hole_id,shot_id,start_coords,end_coords,club,bearing,rating
0,1,1,1,1,"(51.60576426300037, -0.22007174187974488)","(51.6044357422908, -0.2196944790609735)",Driver,286.973786,0.281199
1,1,1,1,2,"(51.6044357422908, -0.2196944790609735)","(51.603432024998135, -0.21899429264472972)",8-Iron,288.025731,0.686447
2,1,1,1,3,"(51.603432024998135, -0.21899429264472972)","(51.60301213993455, -0.21925742493499673)",SW,238.214285,0.998526
3,2,1,1,1,"(51.60576426300037, -0.22007174187974488)","(51.60390037216681, -0.2195181699551096)",Driver,286.973786,0.981531
4,2,1,1,2,"(51.60390037216681, -0.2195181699551096)","(51.60335422443254, -0.21916248136943686)",SW,287.900127,0.814996


# **Cell 2: Preprocessing Datasets**

In [None]:
import numpy as np
import pandas as pd
from ast import literal_eval
from geopy.distance import geodesic
from shapely.geometry import Point, Polygon
import ast
from shapely.geometry.linestring import LineString

#Area Functions:

class Area():
      def __init__(self, hole_data):
        self.hole_data = hole_data
        self.polygons = self.create_polygon()

      def parse_hole_data(self):
          predefined_areas = ['Fairway', 'TreeLine', 'Green', 'Bunker', 'Zone', 'TeeBox']
          area_coordinates = {}

          for area in predefined_areas:
              arrays = self.hole_data.loc[self.hole_data['Area'] == area, 'Coordinates'].values

              converted = [ast.literal_eval(item) for item in arrays]

              area_coordinates[area] = converted

          return area_coordinates

      def create_polygon(self):
          area_coordinates = self.parse_hole_data()
          hole_polygons = {}

          for zone, coordinates in area_coordinates.items():
              #print(f"Zone: {zone}")
              for i, coords in enumerate(coordinates):
                  polygon = Polygon(coords)
                  hole_polygons.setdefault(zone, []).append(polygon)
                  #print(f"  Polygon {i + 1}: {polygon}")

          return hole_polygons

      def return_location(self, location):
          predefined_areas = ['Fairway', 'TreeLine', 'Green', 'Bunker', 'Zone', 'TeeBox']
          is_inside = {}

          for zone, polygons in self.polygons.items():
              if zone in predefined_areas:
                  is_inside[zone] = False

                  for i, polygon in enumerate(polygons):
                      if isinstance(polygon, str):
                          self.polygons[zone][i] = Polygon(ast.literal_eval(polygon))
                          polygon = self.polygons[zone][i]

                      if polygon.contains(location):
                          is_inside[zone] = True
                          break

          if is_inside.get("TeeBox", True):
            return "TeeBox"
          elif is_inside.get("Bunker", True):
            return "Bunker"
          elif is_inside.get("Green", True):
            return "Green"
          elif is_inside.get("Fairway", True):
            return "Fairway"
          elif is_inside.get("TreeLine", True):
            return "Treeline"
          elif is_inside.get("Zone", True):
            return "Rough"
          else:
            return "Invalid"





Area = Area(hole_data)
Area.create_polygon()

# Merge Table 1 (player_simulation) & Table 2 (player_data) on Player ID
df = player_simulation.merge(player_data, on="player_id", how="left")

df["start_x"], df["start_y"] = zip(*df["start_coords"].apply(lambda x: literal_eval(x) if isinstance(x, str) else x))
df["end_x"], df["end_y"] = zip(*df["end_coords"].apply(lambda x: literal_eval(x) if isinstance(x, str) else x))



R = 6371 * 1093.61  # Earth's radius in yards

df["distance_covered"] = 2 * R * np.arcsin(
    np.sqrt(
        np.sin(np.radians(df["end_x"] - df["start_x"]) / 2) ** 2 +
        np.cos(np.radians(df["start_x"])) * np.cos(np.radians(df["end_x"])) *
        np.sin(np.radians(df["end_y"] - df["start_y"]) / 2) ** 2
    )
)



df["Gender"] = df["Gender"].map({"Male": 0, "Female": 1})  # Convert Gender to binary

club_mapping = dict(enumerate(df["club"].astype("category").cat.categories))


df["club"] = df["club"].astype("category").cat.codes




df["start_zone"] = df.apply(lambda row: Area.return_location(Point(row["start_x"], row["start_y"])), axis=1)
df["end_zone"] = df.apply(lambda row: Area.return_location(Point(row["end_x"], row["end_y"])), axis=1)

#display(df)


all_zones = pd.concat([df["start_zone"], df["end_zone"]]).astype("category").cat.categories
zone_mapping = {zone: i for i, zone in enumerate(all_zones)}

df["start_zone"] = df["start_zone"].map(zone_mapping)
df["end_zone"] = df["end_zone"].map(zone_mapping)


df.drop(columns=[ "start_coords", "end_coords"], inplace=True)
display(df)

# Print the club mapping
print("Club Mapping:", club_mapping)

print("Zone Mapping:", zone_mapping)

#print("Club Type Mapping:", club_type_mapping)

Unnamed: 0,round_id,player_id,hole_id,shot_id,club,bearing,rating,Age,Gender,HCP,...,SW_Dispersion,LW,LW_Dispersion,start_x,start_y,end_x,end_y,distance_covered,start_zone,end_zone
0,1,1,1,1,11,286.973786,0.281199,31,0,15.0,...,4.5,,,51.605764,-0.220072,51.604436,-0.219694,164.046672,4,1
1,1,1,1,2,9,288.025731,0.686447,31,0,15.0,...,4.5,,,51.604436,-0.219694,51.603432,-0.218994,133.019885,1,3
2,1,1,1,3,15,238.214285,0.998526,31,0,15.0,...,4.5,,,51.603432,-0.218994,51.603012,-0.219257,54.791103,3,2
3,2,1,1,1,11,286.973786,0.981531,31,0,15.0,...,4.5,,,51.605764,-0.220072,51.603900,-0.219518,230.480159,4,1
4,2,1,1,2,15,287.900127,0.814996,31,0,15.0,...,4.5,,,51.603900,-0.219518,51.603354,-0.219162,71.641295,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32601,8028,4000,1,2,9,291.028682,0.124904,29,0,29.7,...,7.0,,,51.604899,-0.219955,51.604631,-0.219767,35.545997,3,1
32602,8028,4000,1,3,1,288.325027,0.076376,29,0,29.7,...,7.0,,,51.604631,-0.219767,51.604412,-0.219643,28.264033,1,1
32603,8028,4000,1,4,1,286.345975,0.104372,29,0,29.7,...,7.0,,,51.604412,-0.219643,51.604080,-0.219550,40.961366,1,1
32604,8028,4000,1,5,9,286.587228,0.561953,29,0,29.7,...,7.0,,,51.604080,-0.219550,51.603364,-0.219330,88.672418,1,3


Club Mapping: {0: '3-Hybrid', 1: '3-Wood', 2: '4-Hybrid', 3: '4-Iron', 4: '5-Hybrid', 5: '5-Iron', 6: '5-Wood', 7: '6-Iron', 8: '7-Iron', 9: '8-Iron', 10: '9-Iron', 11: 'Driver', 12: 'GW', 13: 'LW', 14: 'PW', 15: 'SW'}
Zone Mapping: {'Bunker': 0, 'Fairway': 1, 'Green': 2, 'Rough': 3, 'TeeBox': 4, 'Treeline': 5}


# **Cell 3: Data Splitting and Target View**

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["club", "hole_id", "bearing", "round_id"])
y = df["club"]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
display(y)


Unnamed: 0,club
0,11
1,9
2,15
3,11
4,15
...,...
32601,9
32602,1
32603,1
32604,9


# **Cell 4: Train a model**
## **Model - Random Forest Classifier**

---



In [None]:
from sklearn.ensemble import RandomForestClassifier
#Best Parameters: {'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 30}


rf = RandomForestClassifier(
    n_estimators=300,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=42


)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
rf.score(X_test, y_test)

0.7956148420729837

# **Cell 5: Train another Model**
## **Model -  XGBoost**

In [None]:
import time
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split


#📌 Best Parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.8, 'random_state': 42, 'use_label_encoder': False, 'eval_metric': 'logloss'}


xgb_model = xgb.XGBClassifier(
    n_estimators=500,        # Number of trees
    learning_rate=0.05,      # Lower for better accuracy
    max_depth=10,             # Depth of trees (tuning needed)
    subsample=0.8,           # % of data used per tree
    colsample_bytree=0.8,    # % of features used per tree
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"
)


print("\n[INFO] Training XGBoost Model...")
start_time = time.time()

xgb_model.fit(X_train, y_train)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"[INFO] Training completed in {elapsed_time:.2f} seconds")

y_pred2 = xgb_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred2)
print(f"\n✅ XGBoost Accuracy: {accuracy:.4f}")

print("\n🔍 Classification Report:\n", classification_report(y_test, y_pred2))



[INFO] Training XGBoost Model...


Parameters: { "use_label_encoder" } are not used.



[INFO] Training completed in 110.22 seconds

✅ XGBoost Accuracy: 0.8617

🔍 Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.88      0.87       135
           1       0.83      0.81      0.82       848
           2       0.94      0.80      0.87        81
           3       0.88      0.83      0.85       120
           4       0.90      0.88      0.89       333
           5       0.79      0.86      0.82       188
           6       0.71      0.63      0.67       172
           7       0.84      0.88      0.86       481
           8       0.76      0.76      0.76       299
           9       0.65      0.49      0.56       201
          10       0.51      0.49      0.50       134
          11       0.88      0.92      0.90      1251
          12       0.83      0.71      0.76       119
          13       0.95      0.98      0.97       802
          14       0.72      0.68      0.70       161
          15       0.94      0.95   

# **Cell 6: Hyper Tuning Parameters**
This will find the best parameters for the XGBoost Model.

In [None]:
'''

import time
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
"""
Emoji's to Copy Paste:

😻 😼 😽 🙀 😿 😾 🙈 🙉 🙊 💋 💘 💝 💖 💗 💓 💞 💕 💌 ❣️ ❤️ 💙 💚 💛
🧡 💜 🖤 🤍 🤎 ❤️‍🔥 ❤️‍🩹 💯 💢 💥 💫 💦 💨 🕳️ 💣 🔥 🩸 💎 🔪 🏹 🛡️ 🚀
💊 💉 🏆 🎗️ 🎯 🎮 🕹️ 🎰 🎲 🎳 🧩 ♟️ 🃏 🀄 🎭 🎨 🖌️ 🎼 🎤 🎧 🎷 🎸 🎹
🎺 🎻 🥁 🪗 🎬 🎪 🎭 🎨 🎫 🎟️ 🎖️ 🏆 🏅 🎗️ 🎯 🏹 🎮 🎰 🎲 🎳 🧩 ♟️ 🃏 🀄
🎭 ⚽ 🏀 🏈 ⚾ 🥎 🎾 🏐 🏉 🏏 🏑 🏒 🥍 🏓 🏸 🎿 ⛷️ 🏂 🪂 🤺 🏌️ ⛳ 🏄
🏊 🤽 🤾 🚴 🚵 🏇 🏆 🏅 🎗️ 🎯 🚗 🚕 🚙 🚌 🚎 🏎️ 🚓 🚑 🚒 🚐 🛻 🚚 🚜
🚲 🛴 🛵 🏍️ 🛺 🚂 🚆 🚇 🚈 🚊 🚉 ✈️ 🛫 🛬 🚀 🛸 🚁 🚤 ⏰ ⏳ ⌛ 🕰️ 🌍
🌎 🌏 🪐 🌕 🌖 🌗 🌘 🌑 🌒 🌓 🌔 🌚 🌝 🌞 ⭐ 🌟 ✨ 💫 🌠 ☁️ ⛅ 🌤️ 🌥️
🌦️ 🌧️ ⛈️ 🔥 🌊 💧 💦 ☔ ❄️ 🌬️ 💨 🌪️ 🌈 🌂 ☂️ 🧥 🧢 👓 🕶️ 🎩 🎓 👑 👒
🎭 🦸 🦹 🧚 🧛 🧜 🧝 🧙 🧞 🧟 🏆 📌 ⏳ 🔍 ✅

"""

# Don't add to many or it will be slow asf
param_grid = {
    'n_estimators': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 6, 10],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2],
    'reg_alpha': [0, 0.1, 0.5],
    'reg_lambda': [1, 1.5, 2]
}

best_model = None
best_params = None
best_score = 0

print("\n[INFO] Starting XGBoost hyperparameter tuning...\n")
start_time = time.time()

for n in param_grid['n_estimators']:
    for lr in param_grid['learning_rate']:
        for depth in param_grid['max_depth']:
            params = {
                'n_estimators': n,
                'learning_rate': lr,
                'max_depth': depth,
                'subsample': 0.8,
                'colsample_bytree': 0.8,
                'random_state': 42,
                'use_label_encoder': False,
                'eval_metric': 'logloss'
            }

            xgb_model = xgb.XGBClassifier(**params)
            xgb_model.fit(X_train, y_train)

            y_pred = xgb_model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

            print(f"🔍 Tried: n_estimators={n}, learning_rate={lr}, max_depth={depth} → Accuracy: {accuracy:.4f}")

            if accuracy > best_score:
                best_score = accuracy
                best_params = params
                best_model = xgb_model

# End time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"\n🏆 Best Model Found! Accuracy: {best_score:.4f}")
print(f"📌 Best Parameters: {best_params}")
print(f"⏳ Time Taken: {elapsed_time:.2f} seconds (~{elapsed_time / 60:.2f} minutes)")

# Print detailed classification report
y_pred_best = best_model.predict(X_test)
print("\n🔍 Classification Report:\n", classification_report(y_test, y_pred_best))



'''

'\n\nimport time\nimport xgboost as xgb\nfrom sklearn.metrics import accuracy_score, classification_report\nfrom sklearn.model_selection import train_test_split\n"""\nEmoji\'s to Copy Paste:\n\n😻 😼 😽 🙀 😿 😾 🙈 🙉 🙊 💋 💘 💝 💖 💗 💓 💞 💕 💌 ❣️ ❤️ 💙 💚 💛\n🧡 💜 🖤 🤍 🤎 ❤️\u200d🔥 ❤️\u200d🩹 💯 💢 💥 💫 💦 💨 🕳️ 💣 🔥 🩸 💎 🔪 🏹 🛡️ 🚀\n💊 💉 🏆 🎗️ 🎯 🎮 🕹️ 🎰 🎲 🎳 🧩 ♟️ 🃏 🀄 🎭 🎨 🖌️ 🎼 🎤 🎧 🎷 🎸 🎹\n🎺 🎻 🥁 🪗 🎬 🎪 🎭 🎨 🎫 🎟️ 🎖️ 🏆 🏅 🎗️ 🎯 🏹 🎮 🎰 🎲 🎳 🧩 ♟️ 🃏 🀄\n🎭 ⚽ 🏀 🏈 ⚾ 🥎 🎾 🏐 🏉 🏏 🏑 🏒 🥍 🏓 🏸 🎿 ⛷️ 🏂 🪂 🤺 🏌️ ⛳ 🏄\n🏊 🤽 🤾 🚴 🚵 🏇 🏆 🏅 🎗️ 🎯 🚗 🚕 🚙 🚌 🚎 🏎️ 🚓 🚑 🚒 🚐 🛻 🚚 🚜\n🚲 🛴 🛵 🏍️ 🛺 🚂 🚆 🚇 🚈 🚊 🚉 ✈️ 🛫 🛬 🚀 🛸 🚁 🚤 ⏰ ⏳ ⌛ 🕰️ 🌍\n🌎 🌏 🪐 🌕 🌖 🌗 🌘 🌑 🌒 🌓 🌔 🌚 🌝 🌞 ⭐ 🌟 ✨ 💫 🌠 ☁️ ⛅ 🌤️ 🌥️\n🌦️ 🌧️ ⛈️ 🔥 🌊 💧 💦 ☔ ❄️ 🌬️ 💨 🌪️ 🌈 🌂 ☂️ 🧥 🧢 👓 🕶️ 🎩 🎓 👑 👒\n🎭 🦸 🦹 🧚 🧛 🧜 🧝 🧙 🧞 🧟 🏆 📌 ⏳ 🔍 ✅\n\n"""\n\n# Don\'t add to many or it will be slow asf\nparam_grid = {\n    \'n_estimators\': [100, 300, 500],\n    \'learning_rate\': [0.01, 0.05, 0.1],\n    \'max_depth\': [3, 6, 10],\n    \'subsample\': [0.6, 0.8, 1.0],\n    \'colsample_bytree\': [0.6, 0.8, 1.0],\n    \'gamma\': [0, 0.1, 0.2],\n    \'reg

# **Cell 7: Analytics on model's features and classification report**

In [None]:
from sklearn.metrics import classification_report
from IPython.core.display import display, HTML
import pandas as pd

XGBOOST_report = classification_report(y_test, y_pred2, output_dict=True)
RFT_report = classification_report(y_test, y_pred, output_dict=True)

XGBOOST_df = pd.DataFrame(XGBOOST_report).transpose()
RFT_df = pd.DataFrame(RFT_report).transpose()

rf_features = pd.DataFrame(rf.feature_importances_, index=X_train.columns, columns=['Random Forest Importance'])
xgb_features = pd.DataFrame(xgb_model.feature_importances_, index=X_train.columns, columns=['XGBoost Importance'])

# Chat GPTed formatting bc i cba
class_report_html = f"""
<div>
    <h3>Random Forest Classification Report</h3>
    {RFT_df.to_html()}</div>
<div>
    <h3>XGBoost Classification Report</h3>
    {XGBOOST_df.to_html()}

</div>
"""

feature_importance_html = f"""
<div>
    <h3>Random Forest Feature Importances</h3>
    {rf_features.to_html()}
</div>
<div>
    <h3>XGBoost Feature Importances</h3>
    {xgb_features.to_html()}
</div>
"""

# Display with flexible layout
display(HTML(f"<div style='display: flex; gap: 20px;'>{class_report_html}</div>"))
display(HTML(f"<div style='display: flex; gap: 20px;'>{feature_importance_html}</div>"))


Unnamed: 0,precision,recall,f1-score,support
0,0.80292,0.814815,0.808824,135.0
1,0.709491,0.722877,0.716121,848.0
2,0.820896,0.679012,0.743243,81.0
3,0.813725,0.691667,0.747748,120.0
4,0.797101,0.825826,0.811209,333.0
5,0.713568,0.755319,0.73385,188.0
6,0.618644,0.424419,0.503448,172.0
7,0.749042,0.81289,0.779661,481.0
8,0.671587,0.608696,0.638596,299.0
9,0.652632,0.308458,0.418919,201.0

Unnamed: 0,precision,recall,f1-score,support
0,0.85,0.881481,0.865455,135.0
1,0.830303,0.807783,0.818888,848.0
2,0.942029,0.802469,0.866667,81.0
3,0.877193,0.833333,0.854701,120.0
4,0.904615,0.882883,0.893617,333.0
5,0.786408,0.861702,0.822335,188.0
6,0.710526,0.627907,0.666667,172.0
7,0.844,0.877339,0.860347,481.0
8,0.76,0.762542,0.761269,299.0
9,0.649007,0.487562,0.556818,201.0


Unnamed: 0,Random Forest Importance
player_id,0.014343
shot_id,0.043727
rating,0.024573
Age,0.012649
Gender,0.002419
HCP,0.019869
Driver,0.017611
Driver_Dispersion,0.014154
3-Wood,0.027136
3-Wood_Dispersion,0.023115

Unnamed: 0,XGBoost Importance
player_id,0.005455
shot_id,0.067396
rating,0.006937
Age,0.005474
Gender,0.008012
HCP,0.017755
Driver,0.012725
Driver_Dispersion,0.005221
3-Wood,0.027753
3-Wood_Dispersion,0.010298


# **Cell 8: Retrive a user and tweak attributes for prediction**

In [None]:
#Change only these 3 lines for a new user and new location.
start_coords = (51.60578319539357, -0.22006148192654287)
end_coords = (51.60404968261719,-0.21954300999641418)
new_user =  player_data.iloc[100:101].copy() #Input is user = user_id:user_id+1


start_zone = Area.return_location(Point(start_coords[0], start_coords[1]))
end_zone = Area.return_location(Point(end_coords[0], end_coords[1]))
shot_number = 1

#new_user.drop(columns=['player_id'], inplace = True)
new_user['Gender'] = new_user['Gender'].map({'Male': 0, 'Female': 1})
new_user['rating'] = 1
new_user['start_x'] = start_coords[0]
new_user['start_y'] = start_coords[1]
new_user['end_x'] = end_coords[0]
new_user['end_y'] = end_coords[1]
new_user['start_zone'] = zone_mapping[start_zone]
new_user['end_zone'] = zone_mapping[end_zone]
new_user["shot_id"] = shot_number
R = 6371 * 1093.61
new_user["distance_covered"] = 2 * R * np.arcsin(
    np.sqrt(
        np.sin(np.radians(new_user["end_x"] - new_user["start_x"]) / 2) ** 2 +
        np.cos(np.radians(new_user["start_x"])) * np.cos(np.radians(new_user["end_x"])) *
        np.sin(np.radians(new_user["end_y"] - new_user["start_y"]) / 2) ** 2
    )
)



new_user

Unnamed: 0,player_id,Age,Gender,HCP,Driver,Driver_Dispersion,3-Wood,3-Wood_Dispersion,5-Wood,5-Wood_Dispersion,...,LW_Dispersion,rating,start_x,start_y,end_x,end_y,start_zone,end_zone,shot_id,distance_covered
100,101,39,1,6.9,217.3,21.9,194.5,19.4,,,...,8.0,1,51.605783,-0.220061,51.60405,-0.219543,4,1,1,214.407987


# **Cell 9: Generate Recommendation**

In [None]:
new_user = new_user[X_train.columns]

predicted_club_code = rf.predict(new_user)[0]
predicted_club = club_mapping[predicted_club_code]
print("Random Forest's Prediction:")
print(f"Recommended Club: {predicted_club} \n")



predicted_club_code = xgb_model.predict(new_user)[0]
predicted_club = club_mapping[predicted_club_code]
print("XRB's Prediction:")
print(f"Recommended Club: {predicted_club}")

print(new_user[predicted_club])

Random Forest's Prediction:
Recommended Club: Driver 

XRB's Prediction:
Recommended Club: Driver
100    217.3
Name: Driver, dtype: float64
