In [1]:
# !pip install plotly jupyterlab

In [5]:
import pandas as pd

import plotly.express as px
import plotly.io as pio
# pio.renderers.default = 'notebook'
pio.renderers.default = 'iframe'

In [21]:
year = 2022

player_stats = pd.read_csv(f'/Users/cb/src/nba_mvp_ml/data/processed/by_season/stats_{year}.csv')
team_stats = pd.read_csv(f'/Users/cb/src/nba_mvp_ml/data/processed/by_season/team_stats_{year}.csv')

# Add '_team' suffix to all columns except identifying columns
team_stats = team_stats.rename(
    columns={col: f"{col}_team" for col in team_stats.columns if col not in ['SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION']}
)


# Match team stats using TEAM_ID without merging
player_team_stats = player_stats.copy()
player_team_stats['PTS_PG_team'] = player_team_stats['TEAM_ID'].map(team_stats.set_index('TEAM_ID')['PTS_PG_team'])
player_team_stats['TS%_team'] = player_team_stats['TEAM_ID'].map(team_stats.set_index('TEAM_ID')['TS%_team'])

In [22]:
# Add team stats to player stats without merging conflicts
player_team_stats['PTS_PG_team'] = player_team_stats['TEAM_ID'].map(team_stats.set_index('TEAM_ID')['PTS_PG_team'])
player_team_stats['TS%_team'] = player_team_stats['TEAM_ID'].map(team_stats.set_index('TEAM_ID')['TS%_team'])

# Interactive scatterplot
fig = px.scatter(
    player_team_stats,
    x='PTS_PG', 
    y='TS%_team',
    color='TEAM_ABBREVIATION',
    hover_data=['PLAYER_FULLNAME', 'PTS_PG', 'TS%_team', 'TEAM_ABBREVIATION'],
    title='Player Scoring vs Team Efficiency'
)
fig.update_layout(xaxis_title='Player Points Per Game (PTS_PG)', yaxis_title='Team True Shooting Percentage (TS%_team)')
fig.show()

In [14]:
# # Add Win Percentage to player stats
# player_team_stats['Win_Pct_team'] = player_team_stats['TEAM_ID'].map(team_stats_2023.set_index('TEAM_ID')['Win_Pct_team'])

# # Top 10 players by Win Shares
# top_ws_players = player_team_stats.nlargest(10, 'WS')

# # Interactive barplot
# fig = px.bar(
#     top_ws_players,
#     x='WS',
#     y='PLAYER_FULLNAME',
#     color='Win_Pct_team',
#     orientation='h',
#     hover_data=['PLAYER_FULLNAME', 'TEAM_ABBREVIATION', 'Win_Pct_team', 'WS'],
#     title='Top Players by Win Shares and Team Success'
# )
# fig.update_layout(xaxis_title='Win Shares (WS)', yaxis_title='Player')
# fig.show()

In [23]:
# Interactive scatterplot
fig = px.scatter(
    player_stats,
    x='TS%', 
    y='PTS_PG',
    color='TEAM_ABBREVIATION',
    hover_data=['PLAYER_FULLNAME', 'PTS_PG', 'TS%', 'TEAM_ABBREVIATION'],
    title='Scoring Efficiency vs Scoring Volume'
)
fig.update_layout(xaxis_title='True Shooting Percentage (TS%)', yaxis_title='Points Per Game (PTS_PG)')
fig.show()

In [16]:
# Interactive scatterplot for team pace vs offensive efficiency
fig = px.scatter(
    team_stats,
    x='Pace_team',
    y='PTS_PG_team',
    color='TEAM_ABBREVIATION',
    hover_data=['TEAM_ABBREVIATION', 'Pace_team', 'PTS_PG_team'],
    title='Team Pace vs Offensive Efficiency'
)
fig.update_layout(xaxis_title='Pace (Team)', yaxis_title='Points Per Game (PTS_PG_team)')
fig.show()

In [20]:
# Assuming MVP data is tagged in player stats
player_stats['MVP_Candidate'] = player_stats['PLAYER_FULLNAME'].apply(
    lambda x: 1 if x in ['Nikola Jokić'.upper(), 
                         'Giannis Antetokounmpo'.upper(),
                        'Shai Gilgeous-Alexander'.upper(),
                        'Luka Dončić'.upper()] else 0  # Example MVPs
)

# Interactive scatterplot for PER vs WS
fig = px.scatter(
    player_stats,
    x='PER',
    y='WS',
    color='MVP_Candidate',
    hover_data=['PLAYER_FULLNAME', 'PER', 'WS', 'TEAM_ABBREVIATION'],
    title='PER vs WS: MVP Candidates vs Others'
)
fig.update_layout(xaxis_title='Player Efficiency Rating (PER)', yaxis_title='Win Shares (WS)')
fig.show()