In [6]:
import pandas as pd

file_path = 'C:\\Users\\User\\OneDrive\\Desktop\\python_developer\\partizan_2022\\data\\partizan_2022_cleaned.csv'

df = pd.read_csv(file_path)

### Play-off vs Regular Season

Compare player performances between the regular season and playoffs to identify who steps up or struggles under postseason pressure.

In [2]:
# filter 
players = df[df['player'] != 'PARTIZAN MOZZART BET BELGRADE'].copy()
team = df[df['player'] == 'PARTIZAN MOZZART BET BELGRADE'].copy()

In [4]:
df['phase'].value_counts()

phase
REGULAR SEASON    437
PLAYOFFS           63
Name: count, dtype: int64

In [5]:
import pandas as pd

# Assuming 'phase' column exists ('regular season' or 'playoffs')
# If not, let me know how to identify phases (e.g., game dates)

# Split data
regular_season = players[players['phase'] == 'REGULAR SEASON'].copy()
playoffs = players[players['phase'] == 'PLAYOFFS'].copy()

# Function to calculate all metrics
def calculate_metrics(df, team_df):
    # PPP
    df['field_goals_attempted'] = df['two_points_attempted'] + df['three_points_attempted']
    df['possessions_used'] = df['field_goals_attempted'] + 0.44 * df['free_throws_attempted'] + df['turnovers']
    df['points_per_possession'] = df['points'] / df['possessions_used']
    df['points_per_possession'] = df['points_per_possession'].fillna(0).replace([float('inf'), -float('inf')], 0)

    # TS%
    df['ts_percentage'] = df['points'] / (2 * (df['field_goals_attempted'] + 0.44 * df['free_throws_attempted']))
    df['ts_percentage'] = df['ts_percentage'].fillna(0).replace([float('inf'), -float('inf')], 0)

    # Pts/32
    df['points_per_32'] = (df['points'] / df['total_seconds']) * 1920
    df['points_per_32'] = df['points_per_32'].fillna(0).replace([float('inf'), -float('inf')], 0)

    # AST/TO (per-game average)
    df['ast_to_ratio'] = df.apply(
        lambda row: row['assists'] if row['turnovers'] == 0 else row['assists'] / row['turnovers'], 
        axis=1
    )

    # USG%
    df = df.merge(team_df[['game', 'field_goals_attempted', 'free_throws_attempted', 'turnovers', 'total_seconds']], 
                  on='game', suffixes=('', '_team'))
    game_seconds = 2400
    df['usage_rate'] = 100 * ((df['field_goals_attempted'] + 0.44 * df['free_throws_attempted'] + df['turnovers']) /
                              (df['field_goals_attempted_team'] + 0.44 * df['free_throws_attempted_team'] + df['turnovers_team'])) * \
                             (df['total_seconds'] / game_seconds)
    df['usage_rate'] = df['usage_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)

    # REB%
    df = df.merge(team_df[['game', 'total_rebounds']], on='game', suffixes=('', '_team_reb'))
    df['rebound_rate'] = 100 * (df['total_rebounds'] / df['total_rebounds_team']) * (df['total_seconds'] / game_seconds)
    df['rebound_rate'] = df['rebound_rate'].fillna(0).replace([float('inf'), -float('inf')], 0)

    return df

# Calculate metrics for each phase
team_regular = team[team['phase'] == 'regular season'].copy()
team_playoffs = team[team['phase'] == 'playoffs'].copy()
regular_season = calculate_metrics(regular_season, team_regular)
playoffs = calculate_metrics(playoffs, team_playoffs)

# Aggregate by player
metrics = ['points_per_possession', 'ts_percentage', 'points_per_32', 'ast_to_ratio', 'usage_rate', 'rebound_rate']
reg_avg = regular_season.groupby('player')[metrics].mean()
play_avg = playoffs.groupby('player')[metrics].mean()

# Combine and calculate differences
combined = reg_avg.join(play_avg, lsuffix='_reg', rsuffix='_play', how='outer').fillna(0)
for metric in metrics:
    combined[f'{metric}_diff'] = combined[f'{metric}_play'] - combined[f'{metric}_reg']

# Top 5 by PPP difference (playoffs - regular)
print("Top 5 Players by PPP Difference (Playoffs - Regular Season):")
print(combined[['points_per_possession_reg', 'points_per_possession_play', 'points_per_possession_diff']]
      .sort_values('points_per_possession_diff', ascending=False).head(5))

# Check: Validate Punter Kevin
player_to_check = 'Punter Kevin'
for phase, df in [('Regular Season', regular_season), ('Playoffs', playoffs)]:
    check_player = df[df['player'] == player_to_check]
    if not check_player.empty:
        ppp_avg = check_player['points_per_possession'].mean()
        raw_means = check_player[['points', 'field_goals_attempted', 'free_throws_attempted', 'turnovers']].mean()
        manual_ppp = raw_means['points'] / (raw_means['field_goals_attempted'] + 0.44 * raw_means['free_throws_attempted'] + raw_means['turnovers']) if (raw_means['field_goals_attempted'] + 0.44 * raw_means['free_throws_attempted'] + raw_means['turnovers']) > 0 else 0
        print(f"\nCheck for {player_to_check} - {phase}:")
        print(f"Computed PPP: {ppp_avg:.4f}")
        print(f"Manual PPP: {manual_ppp:.4f}")
        print(f"Raw Averages - Points: {raw_means['points']:.2f}, FGA: {raw_means['field_goals_attempted']:.2f}, "
              f"FTA: {raw_means['free_throws_attempted']:.2f}, TO: {raw_means['turnovers']:.2f}")

KeyError: "['field_goals_attempted'] not in index"