In [209]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
file_path = "../data/partizan_2022_cleaned.csv"
df = pd.read_csv(file_path)

players = df[df['player'] != 'PARTIZAN MOZZART BET BELGRADE'].copy()
team = df[df['player'] == 'PARTIZAN MOZZART BET BELGRADE'].copy()

# Filter players and team
players_1 = df[df['player'] != 'PARTIZAN MOZZART BET BELGRADE'].copy()
team_1 = df[df['player'] == 'PARTIZAN MOZZART BET BELGRADE'].copy()


players_1['field_goals_attempted'] = players_1['two_points_attempted'] + players_1['three_points_attempted']
team_1['field_goals_attempted'] = team_1['two_points_attempted'] + team_1['three_points_attempted']



In [210]:
# Copy the DataFrame for the next step
players_3 = players_1.copy()


# Calculate TS% (does not depend on team stats)
players_3['ts_percentage'] = players_3['points'] / (2 * (players_3['field_goals_attempted'] + 0.44 * players_3['free_throws_attempted']))
players_3['ts_percentage'] = players_3['ts_percentage'].fillna(0).replace([float('inf'), -float('inf')], 0)

# Debug: Check TS% for a few players
print("TS% for first 5 rows:")
print(players_3[['player', 'ts_percentage']].head(5))

TS% for first 5 rows:
         player  ts_percentage
0  Punter Kevin       0.868984
1  Punter Kevin       0.729167
2  Punter Kevin       0.522193
3  Punter Kevin       0.570342
4  Punter Kevin       0.696721


In [211]:
# Copy the DataFrame for the next step
players_4 = players_1.copy()

# Calculate PP32 (does not depend on team stats)
players_4['points_per_32'] = (players_4['points'] / players_4['total_seconds']) * 1920  # 1920 seconds = 32 minutes
players_4['points_per_32'] = players_4['points_per_32'].fillna(0).replace([float('inf'), -float('inf')], 0)

# Debug: Check PP32 for a few players
print("PP32 for first 5 rows:")
print(players_4[['player', 'points_per_32']].head(5))

PP32 for first 5 rows:
         player  points_per_32
0  Punter Kevin      24.375000
1  Punter Kevin      24.347826
2  Punter Kevin      17.524244
3  Punter Kevin      16.587473
4  Punter Kevin      17.133858


In [212]:
# Copy the DataFrame for the next step
players_5 = players_1.copy()

# Calculate AST/TO per game (does not depend on team stats)
players_5['ast_to_ratio'] = players_5.apply(lambda row: row['assists'] if row['turnovers'] == 0 else row['assists'] / row['turnovers'], axis=1)

# Debug: Check AST/TO for a few players
print("AST/TO for first 5 rows:")
print(players_5[['player', 'ast_to_ratio']].head(5))

AST/TO for first 5 rows:
         player  ast_to_ratio
0  Punter Kevin           3.0
1  Punter Kevin           1.5
2  Punter Kevin           6.0
3  Punter Kevin           2.0
4  Punter Kevin           3.0


In [213]:
# calculate total shot attempts because of merging below
players['field_goals_attempted'] = players['two_points_attempted'] + players['three_points_attempted']
team['field_goals_attempted'] = team['two_points_attempted'] + team['three_points_attempted']


# merge team stats to players
players = players.merge(team[['game', 'field_goals_attempted', 'free_throws_attempted', 'turnovers', 'total_seconds']], 
                        on='game', suffixes=('', '_team')).reset_index(drop=True)


# calculate Usage Rate
game_seconds = 2400  # 40 minutes * 60 seconds
players['usage_rate'] = 100 * ((players['field_goals_attempted'] + 0.44 * players['free_throws_attempted'] + players['turnovers']) /
                               (players['field_goals_attempted_team'] + 0.44 * players['free_throws_attempted_team'] + players['turnovers_team'])) * \
                              (players['total_seconds'] / game_seconds)
     
                                       
# handle division by zero (players with no team stats)                              
players['usage_rate'] = players['usage_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)


# average USG% per player
avg_usg = players.groupby('player')['usage_rate'].mean().sort_values(ascending=False)


print("Top 5 Players by Usage Rate (USG%):")
print(avg_usg.head(5))

Top 5 Players by Usage Rate (USG%):
player
Punter Kevin       13.644535
Leday Zach         11.260251
Lessort Mathias     9.040352
Exum Dante          8.773679
Nunnally James      6.643273
Name: usage_rate, dtype: float64


In [214]:
# Copy the DataFrame for the next step
players_7 = players_1.copy()


# Calculate PPP
players_7['possessions_used'] = players_7['field_goals_attempted'] + 0.44 * players_7['free_throws_attempted'] + players_7['turnovers']
players_7['points_per_possession'] = players_7['points'] / players_7['possessions_used']
players_7['points_per_possession'] = players_7['points_per_possession'].fillna(0).replace([float('inf'), -float('inf')], 0)

# Debug: Check PPP for a few players
print("PPP for first 5 rows:")
print(players_7[['player', 'points_per_possession']].head(5))

PPP for first 5 rows:
         player  points_per_possession
0  Punter Kevin               1.533019
1  Punter Kevin               1.320755
2  Punter Kevin               1.044386
3  Punter Kevin               1.041667
4  Punter Kevin               1.287879


In [216]:
# calculate Rebound Rate
# formula: Rebound Rate = 100 * (Player Total Rebounds / Team Total Rebounds) * (Player Total Seconds / game_seconds)
# Rebound Rate (REB%): Percentage of team rebounds secured, adjusted for playing time
# approximates rebounding impact using team totals as a proxy for available rebounds.

# merge team stats to players 
players_1 = players_1.merge(team[['game', 'total_rebounds']], on='game', suffixes=('', '_team'))

# reset index to avoid duplicate labels
players_1 = players_1.reset_index(drop=True)


# calculate Rebound Rate
game_seconds = 2400  # 40 minutes * 60 seconds
players_1['rebound_rate'] = 100 * (players_1['total_rebounds'] / players_1['total_rebounds_team']) * (players_1['total_seconds'] / game_seconds)


# handle division by zero (players with no team stats)
players_1['rebound_rate'] = players_1['rebound_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)

# average Rebound Rate per player
avg_rr = players_1.groupby('player')['rebound_rate'].mean().sort_values(ascending=False)

print("Top 5 Players by Rebound Rate:") 
print(avg_rr.head(5))

Top 5 Players by Rebound Rate:
player
Lessort Mathias    16.606726
Leday Zach         12.978675
Punter Kevin        5.821043
Nunnally James      4.828153
Exum Dante          4.804690
Name: rebound_rate, dtype: float64


In [217]:
# Compute averages
avg_ts = players_3.groupby('player')['ts_percentage'].mean().sort_values(ascending=False)
avg_pp32 = players_4.groupby('player')['points_per_32'].mean().sort_values(ascending=False)
avg_ast_to = players_5.groupby('player')['ast_to_ratio'].mean().sort_values(ascending=False)
avg_ppp = players_7.groupby('player')['points_per_possession'].mean().sort_values(ascending=False)


# Print top 5 for each metric
print("Top 5 Players by True Shooting Percentage (TS%):")
print(avg_ts.head(5))
print("\nTop 5 Players by Points per 32 Minutes (PP32):")
print(avg_pp32.head(5))
print("\nTop 5 Players by Assist-to-Turnover Ratio (AST/TO):")
print(avg_ast_to.head(5))
print("\nTop 5 Players by Usage Rate (USG%):")
print(avg_usg.head(5))
print("\nTop 5 Players by Points per Possession (PPP):")
print(avg_ppp.head(5))
print("\nTop 5 Players by Rebound Rate (REB%):")
print(avg_rr.head(5))



Top 5 Players by True Shooting Percentage (TS%):
player
Lessort Mathias    0.721486
Exum Dante         0.671916
Punter Kevin       0.623184
Smailagic Alen     0.619905
Leday Zach         0.587875
Name: ts_percentage, dtype: float64

Top 5 Players by Points per 32 Minutes (PP32):
player
Punter Kevin        18.341875
Exum Dante          18.070395
Smailagic Alen      17.346076
Lessort Mathias     12.758641
Avramovic Aleksa    12.523459
Name: points_per_32, dtype: float64

Top 5 Players by Assist-to-Turnover Ratio (AST/TO):
player
Exum Dante          1.857456
Punter Kevin        1.844144
Nunnally James      1.401316
Madar Yam           1.391026
Avramovic Aleksa    1.201389
Name: ast_to_ratio, dtype: float64

Top 5 Players by Usage Rate (USG%):
player
Punter Kevin       13.644535
Leday Zach         11.260251
Lessort Mathias     9.040352
Exum Dante          8.773679
Nunnally James      6.643273
Name: usage_rate, dtype: float64

Top 5 Players by Points per Possession (PPP):
player
Lessort Mat