In [None]:
import pandas as pd

file_path = 'C:\\Users\\User\\OneDrive\\Desktop\\python_developer\\partizan_2022\\data\\partizan_2022_cleaned.csv'

df = pd.read_csv(file_path) 

### Efficiency metrics

1. True Shooting Percentage (TS%)
2. Points per 32 Minutes (close to typical starter's minutes)
3. Assist-to-Turnover Ratio (AST/TO)
4. Usage Rate (USG%)
5. Points per possession (PPP) 
6. Rebound Rate (REB%)

In [157]:
# filter 
players = df[df['player'] != 'PARTIZAN MOZZART BET BELGRADE'].copy()
team = df[df['player'] == 'PARTIZAN MOZZART BET BELGRADE'].copy()

In [145]:
# calculate True Shooting Percentage (TS%)
# formula: TS% = Points / (2 * (FGA + 0.44 * FTA)), where FGA = 2PA + 3PA
players['field_goals_attempted'] = players['two_points_attempted'] + players['three_points_attempted']
players['ts_percentage'] = players['points'] / (2 * (players['field_goals_attempted'] + 0.44 * players['free_throws_attempted']))

# handle division by zero (players with no shot attempts)
players['ts_percentage'] = players['ts_percentage'].fillna(0).replace([float('inf'), -float('inf')], 0)

# average TS% per player
avg_ts = players.groupby('player')['ts_percentage'].mean().sort_values(ascending=False)


print("Top 5 Players by True Shooting Percentage (TS%):")
print(avg_ts.head())


Top 5 Players by True Shooting Percentage (TS%):
player
Lessort Mathias    0.721486
Exum Dante         0.671916
Punter Kevin       0.623184
Smailagic Alen     0.619905
Leday Zach         0.587875
Name: ts_percentage, dtype: float64


In [146]:
# calculate Points per 32 Minutes (close to typical starter's minutes)
# formula: (Points / Total Seconds) * 1920 (where 1920 seconds = 32 minutes)
players['points_per_32'] = (players['points'] / players['total_seconds']) * 1920

# handle division by zero (players with 0 seconds)
players['points_per_32'] = players['points_per_32'].fillna(0).replace([float('inf'), -float('inf')], 0)

# average points per 32 minutes per player
avg_points_per_32 = players.groupby('player')['points_per_32'].mean().sort_values(ascending=False)


avg_points_per_32 = avg_points_per_32.reset_index()
avg_points_per_32.columns = ['player', 'points_per_32']
avg_points_per_32.set_index('player', inplace=True)

avg_points_per_32.head()

Unnamed: 0_level_0,points_per_32
player,Unnamed: 1_level_1
Punter Kevin,18.341875
Exum Dante,18.070395
Smailagic Alen,17.346076
Lessort Mathias,12.758641
Avramovic Aleksa,12.523459


In [147]:
# calculate Assist-to-Turnover Ratio (AST/TO)
players['ast_to_ratio'] = players.apply(
    lambda row: row['assists'] if row['turnovers'] == 0 else row['assists'] / row['turnovers'], 
    axis=1
)

# handle players with 0 turnovers if needed
players['ast_to_ratio'] = players['ast_to_ratio'].replace([float('inf'), -float('inf')], 0)

# average AST/TO per player
avg_ast_to = players.groupby('player')['ast_to_ratio'].mean().sort_values(ascending=False)

print("Top 5 Players by Assist-to-Turnover Ratio (AST/TO):")
print(avg_ast_to.head())

Top 5 Players by Assist-to-Turnover Ratio (AST/TO):
player
Exum Dante          1.857456
Punter Kevin        1.844144
Nunnally James      1.401316
Madar Yam           1.391026
Avramovic Aleksa    1.201389
Name: ast_to_ratio, dtype: float64


Usage Rate is a fantastic metric because it shows how much of a team’s offensive action a player is responsible for when they’re on the court

In [148]:
# calculate Usage Rate (USG%)
# Usage Rate (USG%): Percentage of team possessions used by a player, adjusted for playing time, calculated as:
# formula: USG% = 100 * ((FGA + 0.44 * FTA + TO) / (FGA_team + 0.44 * FTA_team + TO_team)) * (TS / 2400)


# calculate total shot attempts because of merging below
players['field_goals_attempted'] = players['two_points_attempted'] + players['three_points_attempted']
team['field_goals_attempted'] = team['two_points_attempted'] + team['three_points_attempted']


# merge team stats to players
players = players.merge(team[['game', 'field_goals_attempted', 'free_throws_attempted', 'turnovers', 'total_seconds']], 
                        on='game', suffixes=('', '_team'))

# calculate Usage Rate
game_seconds = 2400  # 40 minutes * 60 seconds
players['usage_rate'] = 100 * ((players['field_goals_attempted'] + 0.44 * players['free_throws_attempted'] + players['turnovers']) /
                               (players['field_goals_attempted_team'] + 0.44 * players['free_throws_attempted_team'] + players['turnovers_team'])) * \
                              (players['total_seconds'] / game_seconds)
     
                                       
# handle division by zero (players with no team stats)                              
players['usage_rate'] = players['usage_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)

# average USG% per player
avg_usg = players.groupby('player')['usage_rate'].mean().sort_values(ascending=False)


print("Top 5 Players by Usage Rate (USG%):")
print(avg_usg.head(5))

Top 5 Players by Usage Rate (USG%):
player
Punter Kevin       13.644535
Leday Zach         11.260251
Lessort Mathias     9.040352
Exum Dante          8.773679
Nunnally James      6.643273
Name: usage_rate, dtype: float64


In [171]:
# points per possession (PPP)
# Scoring efficiency per possession used, highlighting players who maximize points from their offensive opportunities.
# formula: PPP = Points / (FGA + 0.44 * FTA + TO)
players['field_goals_attempted'] = players['two_points_attempted'] + players['three_points_attempted']
players['possessions_used'] = players['field_goals_attempted'] + 0.44 * players['free_throws_attempted'] + players['turnovers']
players['points_per_possession'] = players['points'] / players['possessions_used']


# handle division by zero (players with no possessions used)
players['points_per_possession'] = players['points_per_possession'].fillna(0).replace([float('inf'), -float('inf')], 0)


# average PPP per player
avg_PPP = players.groupby('player')['points_per_possession'].mean().sort_values(ascending=False)


print("Top 5 Players by Points per Possession (PPP):")
print(avg_PPP.head(5))

Top 5 Players by Points per Possession (PPP):
player
Lessort Mathias    1.232044
Exum Dante         1.146356
Punter Kevin       1.126225
Smailagic Alen     1.123254
Leday Zach         1.062734
Name: points_per_possession, dtype: float64


In [170]:
print(players[players['player'] == 'Smailagic Alen']['points_per_possession'].mean())

1.1232538908978038


In [151]:
# Recalculate PPP fresh
players['field_goals_attempted'] = players['two_points_attempted'] + players['three_points_attempted']
players['possessions_used'] = players['field_goals_attempted'] + 0.44 * players['free_throws_attempted'] + players['turnovers']
players['points_per_possession'] = players['points'] / players['possessions_used']
players['points_per_possession'] = players['points_per_possession'].fillna(0).replace([float('inf'), -float('inf')], 0)

# Average PPP per player
avg_PPP = players.groupby('player')['points_per_possession'].mean().sort_values(ascending=False)
print("Top 5 Players by Points per Possession (PPP):")
print(avg_PPP.head(5))

# Confirm Smailagic
print("\nSmailagic Alen PPP Check:")
print(players[players['player'] == 'Smailagic Alen']['points_per_possession'].mean())

Top 5 Players by Points per Possession (PPP):
player
Lessort Mathias    1.235191
Smailagic Alen     1.193479
Punter Kevin       1.159680
Exum Dante         1.149233
Leday Zach         1.073341
Name: points_per_possession, dtype: float64

Smailagic Alen PPP Check:
1.1934786790599137


In [152]:
# calculate Rebound Rate
# formula: Rebound Rate = 100 * (Player Total Rebounds / Team Total Rebounds) * (Player Total Seconds / game_seconds)
# Rebound Rate (REB%): Percentage of team rebounds secured, adjusted for playing time

# merge team stats to players 
players = players.merge(team[['game', 'total_rebounds']], on='game', suffixes=('', '_team'))


# calculate Rebound Rate
game_seconds = 2400  # 40 minutes * 60 seconds
players['rebound_rate'] = 100 * (players['total_rebounds'] / players['total_rebounds_team']) * (players['total_seconds'] / game_seconds)


# handle division by zero (players with no team stats)
players['rebound_rate'] = players['rebound_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)

# average Rebound Rate per player
avg_rr = players.groupby('player')['rebound_rate'].mean().sort_values(ascending=False)

print("Top 5 Players by Rebound Rate:") 
print(avg_rr.head(5))


Top 5 Players by Rebound Rate:
player
Lessort Mathias    14.053764
Leday Zach         12.765595
Punter Kevin        6.894055
Nunnally James      5.106059
Exum Dante          4.971516
Name: rebound_rate, dtype: float64
