In [4]:
from google.colab import drive
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go

In [5]:
# Mount Google Drive (if the model is saved there)
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
csv_path = "/content/drive/My Drive/FIT/Work_Space_Shot_Quality_Metric/Shot_Visuals_SpencerJohnson_GustafStrom.csv"
point_df_eda = pd.read_csv(csv_path)
player_1 = "Spencer Johnson"
player_2 = "Gustaf Strom"
player_1_dom_hand = "left"
player_2_dom_hand = "right"

In [7]:
# isError - Jimmy
def isError(df):
  error_columns = ['isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong']
  df_error = df[error_columns]

  # Create a new column "isError" with initial empty values
  df['isError'] = None

  # For each row in filtered_df, if any column has value 1, set the value of "isError" column to 1
  df.loc[df_error.any(axis=1), 'isError'] = 1

isError(point_df_eda)
print(point_df_eda['isError'])

0      None
1         1
2      None
3         1
4      None
       ... 
394    None
395       1
396    None
397    None
398    None
Name: isError, Length: 399, dtype: object


In [8]:
# Number of shots hit by a player in the match - Jimmy
def total_shots(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_shots = 0
    for i in range(len(df_as_server)):
      if (df_as_server.iloc[i]['shotInRally'] % 2 == 1):
        total_shots += 1
    for i in range(len(df_as_returner)):
      if (df_as_returner.iloc[i]['shotInRally'] % 2 == 0):
        total_shots += 1

    return total_shots

print(f"Total shots by {player_1} is {total_shots(df=point_df_eda, player=player_1)}.")

Total shots by Spencer Johnson is 199.


In [9]:
def total_points(df):
  return df['pointNumber'].max()

print(f"Total points of the match is {total_points(point_df_eda)}.")

Total points of the match is 82.


In [10]:
def total_points_won(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    win_as_server_count = 0
    win_as_returner_count = 0

    for i in range(0, df_as_server.shape[0]):
        if (df_as_server.iloc[i]['isPointEnd'] == 1):
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 1 and
                (df_as_server.iloc[i]['isWinner'] == 1 or
                 df_as_server.iloc[i]['isAce'] == 1)):
                win_as_server_count += 1
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 0 and
                df_as_server.iloc[i]['isError'] == 1):
                win_as_server_count += 1

    for j in range(0, df_as_returner.shape[0]):
        if df_as_returner.iloc[j]['isPointEnd'] == 1:
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 0 and
                df_as_returner.iloc[j]['isWinner'] == 1):
                win_as_returner_count += 1
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 1 and
                df_as_returner.iloc[j]['isError'] == 1):
                win_as_returner_count += 1

    total = win_as_server_count + win_as_returner_count
    return total

print(f"Total points won by {player_1} is : {total_points_won(point_df_eda, player_1)}.")

Total points won by Spencer Johnson is : 54.


In [11]:
def shotType_direct_win_per(df, player, shot_type):
    sType = shot_type.lower()

    # Filter for the player's data
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    # Vectorized filtering for winners
    df_as_server_win = df_as_server[
        (df_as_server['shotInRally'] % 2 == 1) &
        (df_as_server['shotInRally'] != 1) &
        (df_as_server['shotFhBh'].str.lower() == sType) &
        (df_as_server['isVolley'] != 1) &
        (df_as_server['isApproach'] != 1) &
        (df_as_server['isOverhead'] != 1) &
        (df_as_server['isWinner'] == 1)
    ]
    df_as_returner_win = df_as_returner[
        (df_as_returner['shotInRally'] % 2 == 1) &
        (df_as_returner['shotFhBh'].str.lower() == sType) &
        (df_as_returner['isVolley'] != 1) &
        (df_as_returner['isApproach'] != 1) &
        (df_as_returner['isOverhead'] != 1) &
        (df_as_returner['isWinner'] == 1)
    ]

    # Calculate total winning points
    winning_points = df_as_server_win.shape[0] + df_as_returner_win.shape[0]

    # Calculate percentage
    total_pts = total_points(df)
    if total_pts == 0:
        return 0
    return round(100 * winning_points / total_pts, 2)

# Example usage
print(f"Forehand winner percentage of {player_1} is : {shotType_direct_win_per(point_df_eda, player_1, 'forehand')}%.")
print(f"Backhand winner percentage of {player_1} is : {shotType_direct_win_per(point_df_eda, player_1, 'backhand')}%.")

Forehand winner percentage of Spencer Johnson is : 6.1%.
Backhand winner percentage of Spencer Johnson is : 4.88%.


In [12]:
# Volley points number - Jimmy
def volley_point_num(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_volley_point = 0
    for i in range(len(df_as_server)):
      if df_as_server.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_server):
          if df_as_server.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_server.iloc[k]['isVolley'] == 1) and (df_as_server.iloc[k]['shotInRally'] % 2 == 1):
            total_volley_point += 1
            break
    for i in range(len(df_as_returner)):
      if df_as_returner.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_returner):
          if df_as_returner.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_returner.iloc[k]['isVolley'] == 1) and (df_as_returner.iloc[k]['shotInRally'] % 2 == 0):
            total_volley_point += 1
            break

    return total_volley_point

print(f"The number of volley points by {player_1} is : {volley_point_num(point_df_eda, player_1)}.")

The number of volley points by Spencer Johnson is : 3.


In [13]:
# Approach points number - Jimmy
def approach_point_num(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_approach_point = 0
    for i in range(len(df_as_server)):
      if df_as_server.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_server):
          if df_as_server.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_server.iloc[k]['isApproach'] == 1) and (df_as_server.iloc[k]['shotInRally'] % 2 == 1):
            total_approach_point += 1
            break
    for i in range(len(df_as_returner)):
      if df_as_returner.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_returner):
          if df_as_returner.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_returner.iloc[k]['isApproach'] == 1) and (df_as_returner.iloc[k]['shotInRally'] % 2 == 0):
            total_approach_point += 1
            break

    return total_approach_point

print(f"The number of volley points by {player_1} is : {approach_point_num(point_df_eda, player_1)}.")

The number of volley points by Spencer Johnson is : 4.


In [14]:
# Overhead points number - Jimmy
def overhead_point_num(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_overhead_point = 0
    for i in range(len(df_as_server)):
      if df_as_server.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_server):
          if df_as_server.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_server.iloc[k]['isOverhead'] == 1) and (df_as_server.iloc[k]['shotInRally'] % 2 == 1):
            total_overhead_point += 1
            break
    for i in range(len(df_as_returner)):
      if df_as_returner.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_returner):
          if df_as_returner.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_returner.iloc[k]['isOverhead'] == 1) and (df_as_returner.iloc[k]['shotInRally'] % 2 == 0):
            total_overhead_point += 1
            break

    return total_overhead_point

print(f"The number of overhead points by {player_1} is : {overhead_point_num(point_df_eda, player_1)}.")

The number of overhead points by Spencer Johnson is : 0.


In [15]:
def total_net_points(df, player):
  return volley_point_num(df,player) + approach_point_num(df,player) + overhead_point_num(df,player)

print(f"The number of net points by {player_1} is : {total_net_points(point_df_eda, player_1)}.")

The number of net points by Spencer Johnson is : 7.


In [16]:
def net_per(df,player):
  return round(100 * total_net_points(df, player) / total_points(df), 2)

print(f"The percentage of net points by {player_1} is : {net_per(point_df_eda, player_1)}%.")

The percentage of net points by Spencer Johnson is : 8.54%.


In [17]:
def uniqueShotType_direct_win_num(df, player, unique_shot_type):
    col = "is" + (unique_shot_type.lower()).capitalize()
    shots = total_shots(df,player)
    winning_points = 0

    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]


    df_as_server_win = df_as_server[(df_as_server["shotInRally"] % 2 == 1) &
                                    (df_as_server[col] == 1)]
    df_as_returner_win = df_as_returner[(df_as_returner["shotInRally"] % 2 == 0) &
                                        (df_as_returner[col] == 1)]
    winning_points += df_as_server_win["isWinner"].count() + df_as_returner_win["isWinner"].count()

    return winning_points

print(f"The number of volley winner by {player_1} is {uniqueShotType_direct_win_num(point_df_eda, player_1, 'volley')}.")
print(f"The number of approach winner by {player_1} is {uniqueShotType_direct_win_num(point_df_eda, player_1, 'approach')}.")
print(f"The number of overhead winner by {player_1} is {uniqueShotType_direct_win_num(point_df_eda, player_1, 'overhead')}.")

The number of volley winner by Spencer Johnson is 1.
The number of approach winner by Spencer Johnson is 1.
The number of overhead winner by Spencer Johnson is 0.


In [18]:
def total_net_points_won(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_net_point_won = 0
    for i in range(len(df_as_server)):
      is_net_point = False
      is_point_won = False
      if df_as_server.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_server):
          if df_as_server.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_server.iloc[k]['isApproach'] == 1 or
              df_as_server.iloc[k]['isVolley'] == 1 or
              df_as_server.iloc[k]['isOverhead'] == 1) and (df_as_server.iloc[k]['shotInRally'] % 2 == 1):
            is_net_point = True
            break
        if ((df_as_server.iloc[j]['shotInRally'] % 2 == 1 and df_as_server.iloc[j]['isWinner'] == 1) or
          (df_as_server.iloc[j]['shotInRally'] % 2 == 0 and df_as_server.iloc[j]['isError'] == 1)):
          is_point_won = True
        if is_net_point and is_point_won:
          total_net_point_won += 1
    for i in range(len(df_as_returner)):
      is_net_point = False
      is_point_won = False
      if df_as_returner.iloc[i]['isPointStart'] == 1:
        j = i
        while j <= len(df_as_returner):
          if df_as_returner.iloc[j]['isPointEnd'] == 1:
            break
          j += 1
        for k in range(i, j+1):
          if (df_as_returner.iloc[k]['isApproach'] == 1 or
              df_as_returner.iloc[k]['isVolley'] == 1 or
              df_as_returner.iloc[k]['isOverhead'] == 1) and (df_as_returner.iloc[k]['shotInRally'] % 2 == 0):
            is_net_point = True
            break
        if ((df_as_returner.iloc[j]['shotInRally'] % 2 == 0 and df_as_returner.iloc[j]['isWinner'] == 1) or
          (df_as_returner.iloc[j]['shotInRally'] % 2 == 1 and df_as_returner.iloc[j]['isError'] == 1)):
          is_point_won = True
        if is_net_point and is_point_won:
          total_net_point_won += 1

    return total_net_point_won

print(f"Total net points won by {player_1} is : {total_net_points_won(point_df_eda, player_1)}")

Total net points won by Spencer Johnson is : 4


In [19]:
def net_direct_win_per(df, player):
  net_direct_win_per = (uniqueShotType_direct_win_num(df, player, 'volley') + uniqueShotType_direct_win_num(df, player, 'approach') +
          uniqueShotType_direct_win_num(df, player, 'overhead')) / total_net_points_won(df, player)
  return round(100 * net_direct_win_per, 2)

print(f"Net winner percentage of {player_1} is : {net_direct_win_per(point_df_eda, player_1)}%.")

Net winner percentage of Spencer Johnson is : 50.0%.


In [20]:
def net_point_winning_per(df, player):
  return round(100 * total_net_points_won(df, player) / total_points(df), 2)

print(f"Net point winning percentage of {player_1} is : {net_point_winning_per(point_df_eda, player_1)}%.")

Net point winning percentage of Spencer Johnson is : 4.88%.


In [21]:
def net_point_error_per(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    total_net_point_error = 0
    for i in range(len(df_as_server)):
      if (df_as_server.iloc[i]['isPointEnd'] == 1 and
      df_as_server.iloc[i]['shotInRally'] % 2 == 1 and
      (df_as_server.iloc[i]['isVolley'] == 1 or
      df_as_server.iloc[i]['isApproach'] == 1 or
      df_as_server.iloc[i]['isOverhead'] == 1) and
      df_as_server.iloc[i]['isError'] == 1):
        total_net_point_error += 1
      elif (df_as_server.iloc[i]['isPointEnd'] == 1 and
      df_as_server.iloc[i-1]['shotInRally'] % 2 == 1 and
      (df_as_server.iloc[i-1]['isVolley'] == 1 or
      df_as_server.iloc[i-1]['isApproach'] == 1 or
      df_as_server.iloc[i-1]['isOverhead'] == 1) and
      df_as_server.iloc[i]['isWinner'] == 1):
        total_net_point_error += 1
    for i in range(len(df_as_returner)):
      if (df_as_returner.iloc[i]['isPointEnd'] == 1 and
      df_as_returner.iloc[i]['shotInRally'] % 2 == 1 and
      (df_as_returner.iloc[i]['isVolley'] == 1 or
      df_as_returner.iloc[i]['isApproach'] == 1 or
      df_as_returner.iloc[i]['isOverhead'] == 1) and
      df_as_returner.iloc[i]['isError'] == 1):
        total_net_point_error += 1
      elif (df_as_returner.iloc[i]['isPointEnd'] == 1 and
      df_as_returner.iloc[i-1]['shotInRally'] % 2 == 1 and
      (df_as_returner.iloc[i-1]['isVolley'] == 1 or
      df_as_returner.iloc[i-1]['isApproach'] == 1 or
      df_as_returner.iloc[i-1]['isOverhead'] == 1) and
      df_as_returner.iloc[i]['isWinner'] == 1):
        total_net_point_error += 1

    return round(100 * total_net_point_error / total_net_points(df, player), 2)

print(f"The net point error percentage of {player_1} is {net_point_error_per(point_df_eda, player_1)}%.")

The net point error percentage of Spencer Johnson is 28.57%.


In [22]:
# Calculate the passing per for player_1
def passing_per(df, player_1, player_2):
    df_as_server = df[df["serverName"] == player_1]
    df_as_returner = df[df["serverName"] != player_1]

    total_passing = 0
    for i in range(len(df_as_server)):
      if (df_as_server.iloc[i]['isPointEnd'] == 1 and
      df_as_server.iloc[i]['shotInRally'] % 2 == 1 and
      (df_as_server.iloc[i-1]['isVolley'] == 1 or
      df_as_server.iloc[i-1]['isApproach'] == 1 or
      df_as_server.iloc[i-1]['isOverhead'] == 1) and
      df_as_server.iloc[i]['isWinner']):
        total_passing += 1
    for i in range(len(df_as_returner)):
      if (df_as_returner.iloc[i]['isPointEnd'] == 1 and
      df_as_returner.iloc[i]['shotInRally'] % 2 == 0 and
      (df_as_returner.iloc[i-1]['isVolley'] == 1 or
      df_as_returner.iloc[i-1]['isApproach'] == 1 or
      df_as_returner.iloc[i-1]['isOverhead'] == 1) and
      df_as_returner.iloc[i]['isWinner']):
        total_passing += 1

    return round(100 * total_passing / total_net_points(df, player_2), 2)

print(f"The net point error percentage of {player_1} is {passing_per(point_df_eda, player_1, player_2)}%.")

The net point error percentage of Spencer Johnson is 125.0%.


In [23]:
def winner_per(df, player):
    df_as_server = df[df["serverName"] == player_1]
    df_as_returner = df[df["serverName"] != player_1]

    total_winner = 0
    for i in range(len(df_as_server)):
      if (df_as_server.iloc[i]['isWinner'] == 1 and
      df_as_server.iloc[i]['shotInRally'] % 2 == 1):
        total_winner += 1
    for i in range(len(df_as_returner)):
      if (df_as_returner.iloc[i]['isWinner'] == 1 and
      df_as_returner.iloc[i]['shotInRally'] % 2 == 0):
        total_winner += 1

    return round(100 * total_winner / total_points_won(df, player), 2)

print(f"Winner percentage of {player_1} is : {winner_per(point_df_eda, player_1)}%.")

Winner percentage of Spencer Johnson is : 27.78%.


In [24]:
def error_per(df, player):
    df_as_server = df[df["serverName"] == player_1]
    df_as_returner = df[df["serverName"] != player_1]

    total_error = 0
    for i in range(len(df_as_server)):
      if (df_as_server.iloc[i]['isError'] == 1 and
      df_as_server.iloc[i]['shotInRally'] % 2 == 1):
        total_error += 1
    for i in range(len(df_as_returner)):
      if (df_as_returner.iloc[i]['isError'] == 1 and
      df_as_returner.iloc[i]['shotInRally'] % 2 == 0):
        total_error += 1

    return round(100 * total_error / total_points(df), 2)

print(f"Error percentage of {player_1} is : {error_per(point_df_eda, player_1)}%.")

Error percentage of Spencer Johnson is : 26.83%.


In [25]:
# Total number of serves - Janel
def total_serve(df, player):
    df_player = df[df["serverName"] == player]
    return df_player['firstServeIn'].count() + df_player['secondServeIn'].count()

print(f"Total number of serve of {player_1} is : {total_serve(point_df_eda, player_1)}")

Total number of serve of Spencer Johnson is : 60


In [26]:
def pts_won_lte_3_shots_per(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    win_as_server_count = 0
    win_as_returner_count = 0

    for i in range(0, df_as_server.shape[0]):
        if (df_as_server.iloc[i]['isPointEnd'] == 1):
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 1 and
                df_as_server.iloc[i]["shotInRally"] <= 3 and
                (df_as_server.iloc[i]['isWinner'] == 1 or
                 df_as_server.iloc[i]['isAce'] == 1)):
                win_as_server_count += 1
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 0 and
                df_as_server.iloc[i]["shotInRally"] <= 3 and
                df_as_server.iloc[i]['isError'] == 1):
                win_as_server_count += 1

    for j in range(0, df_as_returner.shape[0]):
        if df_as_returner.iloc[j]['isPointEnd'] == 1:
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 0 and
                df_as_server.iloc[i]["shotInRally"] <= 3 and
                df_as_returner.iloc[j]['isWinner'] == 1):
                win_as_returner_count += 1
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 1 and
                df_as_server.iloc[i]["shotInRally"] <= 3 and
                df_as_returner.iloc[j]['isError'] == 1):
                win_as_returner_count += 1

    total = win_as_server_count + win_as_returner_count

    return round(100 * total / total_serve(df, player), 2)

print(f"The percentage of serve points won within 3 shots by {player_1} is : {pts_won_lte_3_shots_per(point_df_eda, player_1)}%.")

The percentage of serve points won within 3 shots by Spencer Johnson is : 73.33%.


In [27]:
def shots_in_pts_won_per(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    shots_in_won = 0

    for i in range(0, df_as_server.shape[0]):
        if (df_as_server.iloc[i]['isPointEnd'] == 1):
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 1 and
                (df_as_server.iloc[i]['isWinner'] == 1 or
                 df_as_server.iloc[i]['isAce'] == 1)):
                shots_in_won += df_as_server.iloc[i]["shotInRally"] // 2 + 1
            elif (df_as_server.iloc[i]["shotInRally"] % 2 == 0 and
                df_as_server.iloc[i]['isError'] == 1):
                shots_in_won += df_as_server.iloc[i]["shotInRally"] // 2

    for j in range(0, df_as_returner.shape[0]):
        if df_as_returner.iloc[j]['isPointEnd'] == 1:
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 0 and
                df_as_returner.iloc[j]['isWinner'] == 1):
                shots_in_won += df_as_returner.iloc[j]["shotInRally"] // 2
            elif (df_as_returner.iloc[j]["shotInRally"] % 2 == 1 and
                df_as_returner.iloc[j]['isError'] == 1):
                shots_in_won += df_as_returner.iloc[j]["shotInRally"] // 2

    return round(100 * shots_in_won / total_shots(df, player), 2)

print(f"The percentage of shots in points lost by {player_1} is : {shots_in_pts_won_per(point_df_eda, player_1)}%.")

The percentage of shots in points lost by Spencer Johnson is : 63.32%.


In [28]:
def shots_in_pts_lost_per(df, player):
    df_as_server = df[df["serverName"] == player]
    df_as_returner = df[df["serverName"] != player]

    shots_in_lost = 0

    for i in range(0, df_as_server.shape[0]):
        if (df_as_server.iloc[i]['isPointEnd'] == 1):
            if (df_as_server.iloc[i]["shotInRally"] % 2 == 0 and
                df_as_server.iloc[i]['isWinner'] == 1):
                shots_in_lost += df_as_server.iloc[i]["shotInRally"] // 2
            elif (df_as_server.iloc[i]["shotInRally"] % 2 == 1 and
                df_as_server.iloc[i]['isError'] == 1):
                shots_in_lost += df_as_server.iloc[i]["shotInRally"] // 2 + 1

    for j in range(0, df_as_returner.shape[0]):
        if df_as_returner.iloc[j]['isPointEnd'] == 1:
            if (df_as_returner.iloc[j]["shotInRally"] % 2 == 1 and
                (df_as_returner.iloc[j]['isWinner'] == 1 or
                 df_as_returner.iloc[j]['isAce'] == 1)):
                shots_in_lost += df_as_returner.iloc[j]["shotInRally"] // 2
            elif (df_as_returner.iloc[j]["shotInRally"] % 2 == 0 and
                df_as_returner.iloc[j]['isError'] == 1):
                shots_in_lost += df_as_returner.iloc[j]["shotInRally"] // 2

    return round(100 * shots_in_lost / total_shots(df, player), 2)

print(f"The percentage of shots in points lost by {player_1} is : {shots_in_pts_lost_per(point_df_eda, player_1)}%.")

The percentage of shots in points lost by Spencer Johnson is : 36.68%.


In [29]:
def shots_in_won_vs_lost_ratio(df, player):
  return round(shots_in_pts_won_per(df, player) / shots_in_pts_lost_per(df, player), 2)

print(f"Shots in points won vs lost ratio by {player_1} is : {shots_in_won_vs_lost_ratio(point_df_eda, player_1)}.")

Shots in points won vs lost ratio by Spencer Johnson is : 1.73.


In [30]:
def inside_in_per(df, player, player_dom_hand):
    # assign a new col to keep track of each inside out forehand
    df['insideInForehand'] = False
    side = 'Ad'

    # change side depending on player dominant hand and opponent dominant hand
    if player_dom_hand != 'right':
        side = 'Deuce'

    # set all inside out forehands to True in the new column we initialized
    df.loc[(df['shotInRally'] != 2) & (df['shotFhBh'] == 'Forehand') &
           (df['shotDirection'] == 'Down the Line') & (df['side'] == side), 'insideInForehand'] = True

    # get only the desired player's shots from the parameter
    player_shots = df[((df['serverName'] == player) & (df['shotInRally'] % 2 != 0) & (df['shotInRally'] != 1)) |
                      ((df['serverName'] != player) & (df['shotInRally'] % 2 == 0))]

    # sum up each direction change the player made divided by their total number of shots
    return round(100 * player_shots['insideInForehand'].sum() / total_shots(df, player), 2)

print(f"The inside in percentage by {player_1} is : {inside_in_per(point_df_eda, player_1, player_1_dom_hand)}%.")

The inside in percentage by Spencer Johnson is : 5.53%.


In [31]:
# inside_out_forehand_num - Derek

def inside_out_per(df, player, player_dom_hand):
    # assign a new col to keep track of each inside out forehand
    df['insideOutForehand'] = False
    side = 'Ad'

    # change side depending on player dominant hand and opponent dominant hand
    if player_dom_hand != 'right':
        side = 'Deuce'

    # set all inside out forehands to True in the new column we initialized
    df.loc[(df['shotInRally'] != 2) & (df['shotFhBh'] == 'Forehand') &
           (df['shotDirection'] == 'Crosscourt') & (df['side'] == side), 'insideOutForehand'] = True

    # get only the desired player's shots from the parameter
    player_shots = df[((df['serverName'] == player) & (df['shotInRally'] % 2 != 0) & (df['shotInRally'] != 1)) |
                      ((df['serverName'] != player) & (df['shotInRally'] % 2 == 0))]

    # sum up each direction change the player made divided by their total number of shots
    return round(100 * player_shots['insideOutForehand'].sum() / total_shots(df, player), 2)

print(f"The inside out percentage by {player_1} is : {inside_out_per(point_df_eda, player_1, player_1_dom_hand)}%.")

The inside out percentage by Spencer Johnson is : 1.51%.


In [32]:
# Data for player 1
forehand_winner_per_1 = shotType_direct_win_per(point_df_eda, player_1, 'forehand')
backhand_winner_per_1 = shotType_direct_win_per(point_df_eda, player_1, 'backhand')
net_per_1 = net_per(point_df_eda, player_1)
net_point_direct_win_per_1 = net_direct_win_per(point_df_eda, player_1)
net_point_winning_per_1 = net_point_winning_per(point_df_eda, player_1)
net_point_error_1 = net_point_error_per(point_df_eda, player_1)
passing_per_1 = passing_per(point_df_eda, player_1, player_2)
winner_per_1 = winner_per(point_df_eda, player_1)
err_per_1 = error_per(point_df_eda, player_1)
pts_won_Ite_3_shots_per_1 = pts_won_lte_3_shots_per(point_df_eda, player_1)
shots_in_pts_won_per_1 = shots_in_pts_won_per(point_df_eda, player_1)
shots_in_pts_lost_per_1 = shots_in_pts_lost_per(point_df_eda, player_1)
shots_in_won_vs_lost_ratio_1 = shots_in_won_vs_lost_ratio(point_df_eda, player_1)
inside_in_per_1 = inside_in_per(point_df_eda, player_1, player_1_dom_hand)
inside_out_per_1 = inside_out_per(point_df_eda, player_1, player_1_dom_hand)
print(f"Forehand Winner Percentage (Player 1): {forehand_winner_per_1}%")
print(f"Backhand Winner Percentage (Player 1): {backhand_winner_per_1}%")
print(f"Net Percentage (Player 1): {net_per_1}%")
print(f"Net Point Direct Win Percentage (Player 1): {net_point_direct_win_per_1}%")
print(f"Net Point Winning Percentage (Player 1): {net_point_winning_per_1}%")
print(f"Net Point Error Percentage (Player 1): {net_point_error_1}%")
print(f"Passing Percentage (Player 1): {passing_per_1}%")
print(f"Winner Percentage (Player 1): {winner_per_1}%")
print(f"Error Percentage (Player 1): {err_per_1}%")
print(f"Points Won <= 3 Shots Percentage (Player 1): {pts_won_Ite_3_shots_per_1}%")
print(f"Shots in Points Won Percentage (Player 1): {shots_in_pts_won_per_1}%")
print(f"Shots in Points Lost Percentage (Player 1): {shots_in_pts_lost_per_1}%")
print(f"Shots in Won vs Lost Ratio (Player 1): {shots_in_won_vs_lost_ratio_1}%")
print(f"Inside-In Percentage (Player 1): {inside_in_per_1}%")
print(f"Inside-Out Percentage (Player 1): {inside_out_per_1}%")

Forehand Winner Percentage (Player 1): 6.1%
Backhand Winner Percentage (Player 1): 4.88%
Net Percentage (Player 1): 8.54%
Net Point Direct Win Percentage (Player 1): 50.0%
Net Point Winning Percentage (Player 1): 4.88%
Net Point Error Percentage (Player 1): 28.57%
Passing Percentage (Player 1): 125.0%
Winner Percentage (Player 1): 27.78%
Error Percentage (Player 1): 26.83%
Points Won <= 3 Shots Percentage (Player 1): 73.33%
Shots in Points Won Percentage (Player 1): 63.32%
Shots in Points Lost Percentage (Player 1): 36.68%
Shots in Won vs Lost Ratio (Player 1): 1.73%
Inside-In Percentage (Player 1): 5.53%
Inside-Out Percentage (Player 1): 1.51%


In [33]:
# Data for player 2
forehand_winner_per_2 = shotType_direct_win_per(point_df_eda, player_2, 'forehand')
backhand_winner_per_2 = shotType_direct_win_per(point_df_eda, player_2, 'backhand')
net_per_2 = net_per(point_df_eda, player_2)
net_point_direct_win_per_2 = net_direct_win_per(point_df_eda, player_2)
net_point_winning_per_2 = net_point_winning_per(point_df_eda, player_2)
net_point_error_2 = net_point_error_per(point_df_eda, player_2)
passing_per_2 = passing_per(point_df_eda, player_2, player_1)
winner_per_2 = winner_per(point_df_eda, player_2)
err_per_2 = error_per(point_df_eda, player_2)
pts_won_Ite_3_shots_per_2 = pts_won_lte_3_shots_per(point_df_eda, player_2)
shots_in_pts_won_per_2 = shots_in_pts_won_per(point_df_eda, player_2)
shots_in_pts_lost_per_2 = shots_in_pts_lost_per(point_df_eda, player_2)
shots_in_won_vs_lost_ratio_2 = shots_in_won_vs_lost_ratio(point_df_eda, player_2)
inside_in_per_2 = inside_in_per(point_df_eda, player_2, player_2_dom_hand)
inside_out_per_2 = inside_out_per(point_df_eda, player_2, player_2_dom_hand)
print(f"Forehand Winner Percentage (Player 2): {forehand_winner_per_2}%")
print(f"Backhand Winner Percentage (Player 2): {backhand_winner_per_2}%")
print(f"Net Percentage (Player 2): {net_per_2}%")
print(f"Net Point Direct Win Percentage (Player 2): {net_point_direct_win_per_2}%")
print(f"Net Point Winning Percentage (Player 2): {net_point_winning_per_2}%")
print(f"Net Point Error Percentage (Player 2): {net_point_error_2}%")
print(f"Passing Percentage (Player 2): {passing_per_2}%")
print(f"Winner Percentage (Player 2): {winner_per_2}%")
print(f"Error Percentage (Player 2): {err_per_2}%")
print(f"Points Won <= 3 Shots Percentage (Player 2): {pts_won_Ite_3_shots_per_2}%")
print(f"Shots in Points Won Percentage (Player 2): {shots_in_pts_won_per_2}%")
print(f"Shots in Points Lost Percentage (Player 2): {shots_in_pts_lost_per_2}%")
print(f"Shots in Won vs Lost Ratio (Player 2): {shots_in_won_vs_lost_ratio_2}%")
print(f"Inside-In Percentage (Player 2): {inside_in_per_2}%")
print(f"Inside-Out Percentage (Player 2): {inside_out_per_2}%")

Forehand Winner Percentage (Player 2): 6.1%
Backhand Winner Percentage (Player 2): 4.88%
Net Percentage (Player 2): 4.88%
Net Point Direct Win Percentage (Player 2): 50.0%
Net Point Winning Percentage (Player 2): 2.44%
Net Point Error Percentage (Player 2): 50.0%
Passing Percentage (Player 2): 28.57%
Winner Percentage (Player 2): 53.57%
Error Percentage (Player 2): 26.83%
Points Won <= 3 Shots Percentage (Player 2): 13.46%
Shots in Points Won Percentage (Player 2): 33.0%
Shots in Points Lost Percentage (Player 2): 67.0%
Shots in Won vs Lost Ratio (Player 2): 0.49%
Inside-In Percentage (Player 2): 10.0%
Inside-Out Percentage (Player 2): 3.5%


In [34]:
# Data for Player 1
player_1_data = {
    "player": "Player 1",
    "forehand_winner_per": forehand_winner_per_1,
    "backhand_winner_per": backhand_winner_per_1,
    "net_per": net_per_1,
    "net_point_direct_win_per": net_point_direct_win_per_1,
    "net_point_winning_per": net_point_winning_per_1,
    "net_point_error": net_point_error_1,
    "passing_per": passing_per_1,
    "winner_per": winner_per_1,
    "err_per": err_per_1,
    "pts_won_Ite_3_shots_per": pts_won_Ite_3_shots_per_1,
    "shots_in_pts_won_per": shots_in_pts_won_per_1,
    "shots_in_pts_lost_per": shots_in_pts_lost_per_1,
    "shots_in_won_vs_lost_ratio": shots_in_won_vs_lost_ratio_1,
    "inside_in_per": inside_in_per_1,
    "inside_out_per": inside_out_per_1
}

# Data for Player 2
player_2_data = {
    "player": "Player 2",
    "forehand_winner_per": forehand_winner_per_2,
    "backhand_winner_per": backhand_winner_per_2,
    "net_per": net_per_2,
    "net_point_direct_win_per": net_point_direct_win_per_2,
    "net_point_winning_per": net_point_winning_per_2,
    "net_point_error": net_point_error_2,
    "passing_per": passing_per_2,
    "winner_per": winner_per_2,
    "err_per": err_per_2,
    "pts_won_Ite_3_shots_per": pts_won_Ite_3_shots_per_2,
    "shots_in_pts_won_per": shots_in_pts_won_per_2,
    "shots_in_pts_lost_per": shots_in_pts_lost_per_2,
    "shots_in_won_vs_lost_ratio": shots_in_won_vs_lost_ratio_2,
    "inside_in_per": inside_in_per_2,
    "inside_out_per": inside_out_per_2
}

# Create DataFrame with one row for each player
columns = [
    "player", "forehand_winner_per", "backhand_winner_per", "net_per",
    "net_point_direct_win_per", "net_point_winning_per", "net_point_error",
    "passing_per", "winner_per", "err_per",
    "pts_won_Ite_3_shots_per", "shots_in_pts_won_per", "shots_in_pts_lost_per",
    "shots_in_won_vs_lost_ratio", "inside_in_per", "inside_out_per"
]

data = [player_1_data, player_2_data]
school_df = pd.DataFrame(data, columns=columns)

# Display the DataFrame
school_df

Unnamed: 0,player,forehand_winner_per,backhand_winner_per,net_per,net_point_direct_win_per,net_point_winning_per,net_point_error,passing_per,winner_per,err_per,pts_won_Ite_3_shots_per,shots_in_pts_won_per,shots_in_pts_lost_per,shots_in_won_vs_lost_ratio,inside_in_per,inside_out_per
0,Player 1,6.1,4.88,8.54,50.0,4.88,28.57,125.0,27.78,26.83,73.33,63.32,36.68,1.73,5.53,1.51
1,Player 2,6.1,4.88,4.88,50.0,2.44,50.0,28.57,53.57,26.83,13.46,33.0,67.0,0.49,10.0,3.5


In [35]:
# Load the saved model
model_path = '/content/drive/My Drive/FIT/Work_Space_Shot_Quality_Metric/Machine_Learning/rf_regressor_model.joblib'
rf_regressor = joblib.load(model_path)

In [36]:
school_df[school_df['player'] == 'Player 1']

Unnamed: 0,player,forehand_winner_per,backhand_winner_per,net_per,net_point_direct_win_per,net_point_winning_per,net_point_error,passing_per,winner_per,err_per,pts_won_Ite_3_shots_per,shots_in_pts_won_per,shots_in_pts_lost_per,shots_in_won_vs_lost_ratio,inside_in_per,inside_out_per
0,Player 1,6.1,4.88,8.54,50.0,4.88,28.57,125.0,27.78,26.83,73.33,63.32,36.68,1.73,5.53,1.51


In [37]:
# Selecting feature columns and target columns based on the provided instructions
feature_columns = [
    'forehand_winner_per', 'backhand_winner_per', 'net_per', 'net_point_direct_win_per', 'net_point_winning_per',
    'net_point_error', 'passing_per', 'winner_per', 'err_per', 'pts_won_Ite_3_shots_per', 'shots_in_pts_won_per',
    'shots_in_pts_lost_per', 'shots_in_won_vs_lost_ratio', 'inside_in_per', 'inside_out_per'
]

target_columns = ['Counter Puncher', 'Attacking Baseliner', 'All-Court Player', 'Solid Baseliner']

# Select the feature columns for Player 1
player_1_X = school_df[school_df['player'] == 'Player 1'][feature_columns]
player_1_pred = rf_regressor.predict(player_1_X)
player_1_predictions = dict(zip(target_columns, player_1_pred[0]))
print(f"Player 1 prediction: {player_1_predictions}.")

# Select the feature columns for Player 2
player_2_X = school_df[school_df['player'] == 'Player 2'][feature_columns]
player_2_pred = rf_regressor.predict(player_2_X)
player_2_predictions = dict(zip(target_columns, player_2_pred[0]))
print(f"Player 2 prediction: {player_2_predictions}.")

Player 1 prediction: {'Counter Puncher': 0.22590605009320533, 'Attacking Baseliner': -0.11051656030159678, 'All-Court Player': 0.9084532060530861, 'Solid Baseliner': -0.747131563111233}.
Player 2 prediction: {'Counter Puncher': 0.19992805568631322, 'Attacking Baseliner': -0.13082237679098976, 'All-Court Player': 0.9308370816804711, 'Solid Baseliner': -0.7286751273363039}.


# Big Server Score

In [38]:
# Total first serve - Jimmy
def total_first_serve(df, player):
  df_player = df[df["serverName"] == player]
  total_fs = df_player['firstServeIn'].count()
  return total_fs

print(f"Total first serve of {player_1} is : {total_first_serve(point_df_eda, player_1)}")

Total first serve of Spencer Johnson is : 44


In [39]:
# Function to calculate the percentage of the feature,
# e.g. percentage of firstServeIn = number of firstServeIn / total number of the first served by player A

#first serve in - Janel
def first_serve_ratio(df, player):
    df_player = df[df["serverName"] == player]
    total_fs = total_first_serve(df, player)
    serves_in = df_player['firstServeIn'].sum()
    per = 100 * serves_in / total_fs
    return per

print(f"Percentage of first_serve_in of {player_1} is : {first_serve_ratio(point_df_eda, player_1):.2f}%")

Percentage of first_serve_in of Spencer Johnson is : 63.64%


In [40]:
# Total second serve - Jimmy
def total_second_serve(df, player):
  df_player = df[df["serverName"] == player]
  total_ss = df_player['secondServeIn'].count()
  return total_ss

print(f"Total second serve of {player_1} is : {total_second_serve(point_df_eda, player_1)}")

Total second serve of Spencer Johnson is : 16


In [41]:
#second serve in - Janel
def second_serve_ratio(df, player):
    df_player = df[df["serverName"] == player]
    total_ss = total_second_serve(df, player)
    serves_in = df_player['secondServeIn'].sum()
    per = 100 * serves_in / total_ss
    return per

print(f"Percentage of second_serve_in of {player_1} is : {second_serve_ratio(point_df_eda, player_1):.2f}%")

Percentage of second_serve_in of Spencer Johnson is : 100.00%


In [42]:
#Ace_per - Janel
def Ace_per(df, player):
    totalServe = total_serve(df, player)
    df_player = df[df["serverName"] == player]
    totalAce = df_player['isAce'].sum()
    per = 100 * totalAce / totalServe
    return per

print(f"Ace percentage of {player_1} is : {Ace_per(point_df_eda, player_1):.2f}%")

Ace percentage of Spencer Johnson is : 5.00%


In [43]:
# First serve ace percentage - Tony
def first_serve_ace_per(df, player):
    total_fs = total_first_serve(df, player)
    df_player = df[df["serverName"] == player]
    df_fs_in = df_player[df_player["firstServeIn"] == 1]
    totalAce = df_fs_in['isAce'].sum()
    per = 100 * totalAce / total_fs
    return per

print(f"First Serve Ace percentage of {player_1} is : {first_serve_ace_per(point_df_eda, player_1):.2f}%")

First Serve Ace percentage of Spencer Johnson is : 6.82%


In [44]:
# Second serve ace percentage - Tony
def second_serve_ace_per(df, player):
    total_ss = total_second_serve(df, player)
    df_player = df[df["serverName"] == player]
    df_ss_in = df_player[df_player["secondServeIn"] == 1]
    totalAce = df_ss_in['isAce'].sum()
    per = 100 * totalAce / total_ss
    return per

print(f"Second Serve Ace percentage of {player_1} is : {second_serve_ace_per(point_df_eda, player_1):.2f}%")

Second Serve Ace percentage of Spencer Johnson is : 0.00%


In [45]:
# firstServe_unreturned - Tony
def firstServe_unreturned(df, player):
    points_served = df[df['serverName'] == player]
    firstServe_unreturned = 0

    for i in range(1, len(points_served)):
        if (points_served.iloc[i]['isPointEnd'] == 1 and
            points_served.iloc[i]['shotInRally'] == 2 and
            points_served.iloc[i]['isError'] == 1 and
            points_served.iloc[i - 1]['firstServeIn'] == 1):
            firstServe_unreturned += 1

    return firstServe_unreturned

print(f"First Serve Unreturned of {player_1}:", firstServe_unreturned(df=point_df_eda, player=player_1))

First Serve Unreturned of Spencer Johnson: 8


In [46]:
# secondServe_unreturned - Tony
def secondServe_unreturned(df, player):
    points_served = df[df['serverName'] == player]
    secondServe_unreturned = 0

    for i in range(1, len(points_served)):
        if (points_served.iloc[i]['isPointEnd'] == 1 and
            points_served.iloc[i]['shotInRally'] == 2 and
            points_served.iloc[i]['isError'] == 1 and
            points_served.iloc[i - 1]['secondServeIn'] == 1):
            secondServe_unreturned += 1

    return secondServe_unreturned

print(f"Second Serve Unreturned of {player_1}:", secondServe_unreturned(df=point_df_eda, player=player_1))

Second Serve Unreturned of Spencer Johnson: 4


In [47]:
# firstServe_plusOne - Tony
def firstServe_plusOne(df, player):
    points_served = df[df['serverName'] == player]
    firstServe_plusOne = 0

    for i in range(1, len(points_served)):
        if ((points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i]['isWinner'] == 1 and
             points_served.iloc[i - 2]['firstServeIn'] == 1) or
            (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 4 and
             points_served.iloc[i]['isError'] == 1 and
             points_served.iloc[i - 3]['firstServeIn'] == 1)):
            firstServe_plusOne += 1

    return firstServe_plusOne

print(f"First Serve Plus One of {player_1}:", firstServe_plusOne(df=point_df_eda, player=player_1))

First Serve Plus One of Spencer Johnson: 7


In [48]:
# secondServe_plusOne - Tony
def secondServe_plusOne(df, player):
    points_served = df[df['serverName'] == player]
    secondServe_plusOne = 0

    for i in range(1, len(points_served)):
        if ((points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i]['isWinner'] == 1 and
             points_served.iloc[i - 2]['secondServeIn'] == 1) or
            (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 4 and
             points_served.iloc[i]['isError'] == 1 and
             points_served.iloc[i - 3]['secondServeIn'] == 1)):
            secondServe_plusOne += 1

    return secondServe_plusOne

print(f"Second Serve Plus One of {player_1}:", secondServe_plusOne(df=point_df_eda, player=player_1))

Second Serve Plus One of Spencer Johnson: 3


In [49]:
# First serve quality score - Jimmy
def first_serve_quality_score(df, player):
  fs_ace_per = first_serve_ace_per(df, player)
  total_fs = total_first_serve(df, player)
  unreturned_fs_per = 100 * firstServe_unreturned(df, player) / total_fs
  fs_plusOne_per = 100 * firstServe_plusOne(df, player) / total_fs
  return fs_ace_per + unreturned_fs_per + fs_plusOne_per

player = "Alexander Hoogmartens"  # or Isac Stromberg
print(f"First serve quality score of {player_1} is : {first_serve_quality_score(point_df_eda, player_1):.2f}%")

First serve quality score of Spencer Johnson is : 40.91%


In [50]:
# Second serve quality score - Jimmy
def second_serve_quality_score(df, player):
  ss_ace_per = second_serve_ace_per(df, player)
  total_ss = total_second_serve(df, player)
  unreturned_ss_per = 100 * secondServe_unreturned(df, player) / total_ss
  ss_plusOne_per = 100 * secondServe_plusOne(df, player) / total_ss
  return ss_ace_per + unreturned_ss_per + ss_plusOne_per

print(f"Second serve quality score of {player_1} is : {second_serve_quality_score(point_df_eda, player_1):.2f}%")

Second serve quality score of Spencer Johnson is : 43.75%


In [51]:
# Serve quality score - Jimmy
def serve_quality_score(df, player):
  total_fs = total_first_serve(df, player)
  total_ss = total_second_serve(df, player)
  totalServe = total_serve(df, player)
  fs_per = total_fs / totalServe
  ss_per = total_ss / totalServe
  fs_score = first_serve_quality_score(df, player)
  ss_score = second_serve_quality_score(df, player)
  return fs_per * fs_score + ss_per * ss_score

print(f"Serve quality score of {player_1} is : {serve_quality_score(point_df_eda, player_1):.2f}")

Serve quality score of Spencer Johnson is : 41.67


# Server and Volleyer

In [52]:
# Number of points won by first serve plus a net shot - Tyler
def win_firstServe_plusOne_netShot(df, player):
    points_served = df[df['serverName'] == player]
    firstServe_plusOne_netShot = 0

    for i in range(1, len(points_served)):
        if ((points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i]['isWinner'] == 1 and
             points_served.iloc[i - 2]['firstServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1 )) or
            (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 4 and
             points_served.iloc[i]['isError'] == 1 and
             points_served.iloc[i - 3]['firstServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1 ))):
            firstServe_plusOne_netShot += 1

    return firstServe_plusOne_netShot

print(f"{player_1} won {win_firstServe_plusOne_netShot(point_df_eda, player_1)} points by first serve plus a net shot.")

Spencer Johnson won 1 points by first serve plus a net shot.


In [53]:
# Number of points won by second serve plus a net shot - Tyler
def win_secondServe_plusOne_netShot(df, player):
    points_served = df[df['serverName'] == player]
    secondServe_plusOne_netShot = 0

    for i in range(1, len(points_served)):
        if ((points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i]['isWinner'] == 1 and
             points_served.iloc[i - 2]['secondServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1 )) or
            (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 4 and
             points_served.iloc[i]['isError'] == 1 and
             points_served.iloc[i]['isVolley'] == 1 and
             points_served.iloc[i - 3]['secondServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1 ))):
            secondServe_plusOne_netShot += 1

    return secondServe_plusOne_netShot

print(f"{player_1} won {win_secondServe_plusOne_netShot(point_df_eda, player_1)} points by second serve plus a net shot.")

Spencer Johnson won 0 points by second serve plus a net shot.


In [54]:
# Number of first serve plus a net shot - Jimmy
def firstServe_plusOne_netShot(df, player):
    points_served = df[df['serverName'] == player]
    firstServe_plusOne_netShot = 0

    for i in range(1, len(points_served)):
        if (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i - 2]['firstServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1)):
            firstServe_plusOne_netShot += 1

    return firstServe_plusOne_netShot

print(f"{player_1} hit {firstServe_plusOne_netShot(point_df_eda, player_1)} first serve plus a net shot.")

Spencer Johnson hit 2 first serve plus a net shot.


In [55]:
# Number of first serve plus a net shot - Jimmy
def secondServe_plusOne_netShot(df, player):
    points_served = df[df['serverName'] == player]
    secondServe_plusOne_netShot = 0

    for i in range(1, len(points_served)):
        if (points_served.iloc[i]['isPointEnd'] == 1 and
             points_served.iloc[i]['shotInRally'] == 3 and
             points_served.iloc[i - 2]['secondServeIn'] == 1 and
             (points_served.iloc[i]['isVolley'] == 1 or
              points_served.iloc[i]['isOverhead'] == 1 or
              points_served.iloc[i]['isApproach'] == 1)):
            secondServe_plusOne_netShot += 1

    return secondServe_plusOne_netShot

print(f"{player_1} hit {secondServe_plusOne_netShot(point_df_eda, player_1)} second serve plus a net shot.")

Spencer Johnson hit 0 second serve plus a net shot.


In [56]:
# Serve and volley percentage = 100 * total_serve_and_volley / total_serve - Jimmy
def serve_and_volley_per(df, player):
    total_serve_and_volley = firstServe_plusOne_netShot(df, player) + secondServe_plusOne_netShot(df, player)
    return 100 * total_serve_and_volley / total_serve(df, player)

print(f"The serve and volley percentage for {player_1} is {serve_and_volley_per(point_df_eda, player_1):.2f}%.")

The serve and volley percentage for Spencer Johnson is 3.33%.


In [57]:
# Serve and volley success percentage = 100 * total_points_won_serve_and_volley / total_points_won - Jimmy
def serve_and_volley_success_per(df, player):
    total_win_serve_and_volley = win_firstServe_plusOne_netShot(df, player) + win_secondServe_plusOne_netShot(df, player)
    return 100 * total_win_serve_and_volley / total_points_won(df, player)

print(f"The serve and volley success percentage for {player_1} is {serve_and_volley_success_per(point_df_eda, player_1):.2f}%.")

The serve and volley success percentage for Spencer Johnson is 1.85%.


In [58]:
# serve and volley score = Serve and volley percentage * 0.5 + Serve and volley success percentage * 0.5 - Jimmy
def serve_and_volley_score(df, player):
    return serve_and_volley_per(df, player) * 0.5 + serve_and_volley_success_per(df, player)

print(f"The serve and volley score for {player_1} is {serve_and_volley_score(point_df_eda, player_1):.2f}.")

The serve and volley score for Spencer Johnson is 3.52.


# Finalize and Plot

In [59]:
player_1_bs_score = serve_quality_score(point_df_eda, player_1)
player_1_sv_score = serve_and_volley_score(point_df_eda, player_1)
player_2_bs_score = serve_quality_score(point_df_eda, player_2)
player_2_sv_score = serve_and_volley_score(point_df_eda, player_2)

# Add the new scores to the predictions for Player 1
player_1_predictions["Big Server"] = player_1_bs_score
player_1_predictions["Serve and Volley"] = player_1_sv_score

# Add the new scores to the predictions for Player 2
player_2_predictions["Big Server"] = player_2_bs_score
player_2_predictions["Serve and Volley"] = player_2_sv_score

# Display the updated dictionaries
print("Player 1 Predictions:", player_1_predictions)
print("Player 2 Predictions:", player_2_predictions)

Player 1 Predictions: {'Counter Puncher': 0.22590605009320533, 'Attacking Baseliner': -0.11051656030159678, 'All-Court Player': 0.9084532060530861, 'Solid Baseliner': -0.747131563111233, 'Big Server': 41.666666666666664, 'Serve and Volley': 3.5185185185185186}
Player 2 Predictions: {'Counter Puncher': 0.19992805568631322, 'Attacking Baseliner': -0.13082237679098976, 'All-Court Player': 0.9308370816804711, 'Solid Baseliner': -0.7286751273363039, 'Big Server': 19.23076923076923, 'Serve and Volley': 0.0}


In [60]:
# Extract labels and values
labels = list(player_1_predictions.keys())
values = list(player_1_predictions.values())

# Close the circle by appending the first value
values += values[:1]
labels += labels[:1]

# Create radar chart using Plotly
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=values,
    theta=labels,
    fill='toself',
    name="Player Profile",
    fillcolor='rgba(0, 0, 255, 0.2)',  # Blue fill with transparency
    line=dict(color='blue')
))

# Customize layout
fig.update_layout(
    polar=dict(
        radialaxis=dict(visible=True, range=[0, 100], tickangle=0),
    ),
    title=dict(
        text=player_1,
        x=0.5,
        font=dict(size=18)
    ),
    showlegend=False
)

# Display the plot
fig.show()

In [61]:
# Extract labels and values
labels = list(player_2_predictions.keys())
values = list(player_2_predictions.values())

# Close the circle by appending the first value
values += values[:1]
labels += labels[:1]

# Create radar chart using Plotly
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=values,
    theta=labels,
    fill='toself',
    name="Player Profile",
    fillcolor='rgba(0, 0, 255, 0.2)',  # Blue fill with transparency
    line=dict(color='blue')
))

# Customize layout
fig.update_layout(
    polar=dict(
        radialaxis=dict(visible=True, range=[0, 100], tickangle=0),
    ),
    title=dict(
        text=player_2,
        x=0.5,
        font=dict(size=18)
    ),
    showlegend=False
)

# Display the plot
fig.show()