# How many teams had 0, 1, 2, or 3+ star players each season from 2021–2025?

In [None]:
sql_api_descriptive = '''
WITH star_players AS (
  SELECT
    team,
    season,
    player
  FROM public.nba_api_player_stats
  WHERE per >= 20
    AND win_shares >= 5
    AND box_plus_minus >= 2
),
star_counts AS (
  SELECT
    team,
    season,
    COUNT(*) AS num_star_players
  FROM star_players
  GROUP BY team, season
),
star_category AS (
  SELECT
    team,
    season,
    CASE 
      WHEN num_star_players = 0 THEN '0 Stars'
      WHEN num_star_players = 1 THEN '1 Star'
      WHEN num_star_players = 2 THEN '2 Stars'
      ELSE '3+ Stars'
    END AS star_tier
  FROM star_counts
)
SELECT
  season,
  star_tier,
  COUNT(*) AS num_teams
FROM star_category
GROUP BY season, star_tier
ORDER BY season, star_tier;
'''

df_api = pd.read_sql(sql_api_descriptive, con=engine)
pd.set_option('display.max_rows', None)
df_api


# Are star players clustered on high-win teams, or spread out?

In [None]:
sql_api_diagnostic = '''
WITH team_name_map AS (
  SELECT * FROM (VALUES
    ('ATL', 'Atlanta Hawks'),
    ('BOS', 'Boston Celtics'),
    ('BRK', 'Brooklyn Nets'),
    ('CHA', 'Charlotte Hornets'),
    ('CHI', 'Chicago Bulls'),
    ('CLE', 'Cleveland Cavaliers'),
    ('DAL', 'Dallas Mavericks'),
    ('DEN', 'Denver Nuggets'),
    ('DET', 'Detroit Pistons'),
    ('GSW', 'Golden State Warriors'),
    ('HOU', 'Houston Rockets'),
    ('IND', 'Indiana Pacers'),
    ('LAC', 'Los Angeles Clippers'),
    ('LAL', 'Los Angeles Lakers'),
    ('MEM', 'Memphis Grizzlies'),
    ('MIA', 'Miami Heat'),
    ('MIL', 'Milwaukee Bucks'),
    ('MIN', 'Minnesota Timberwolves'),
    ('NOP', 'New Orleans Pelicans'),
    ('NYK', 'New York Knicks'),
    ('OKC', 'Oklahoma City Thunder'),
    ('ORL', 'Orlando Magic'),
    ('PHI', 'Philadelphia 76ers'),
    ('PHO', 'Phoenix Suns'),
    ('POR', 'Portland Trail Blazers'),
    ('SAC', 'Sacramento Kings'),
    ('SAS', 'San Antonio Spurs'),
    ('TOR', 'Toronto Raptors'),
    ('UTA', 'Utah Jazz'),
    ('WAS', 'Washington Wizards')
  ) AS t(abbrev, full_name)
),
team_wins AS (
  SELECT 
    m.abbrev AS team,
    ws."Season" AS season,
    CAST(ws."Wins" AS FLOAT) / (ws."Wins" + ws."Losses") AS win_pct
  FROM public.web_scraped_standings ws
  JOIN team_name_map m ON ws."Team" = m.full_name
),
star_players AS (
  SELECT 
    player, team, season
  FROM public.nba_api_player_stats
  WHERE per >= 20 AND win_shares >= 5 AND box_plus_minus >= 2
),
joined AS (
  SELECT 
    sp.player,
    sp.season,
    tw.win_pct
  FROM star_players sp
  JOIN team_wins tw ON sp.team = tw.team AND sp.season = tw.season
)
SELECT 
  CASE 
    WHEN win_pct >= 0.6 THEN 'High Winning Teams (≥.600)'
    WHEN win_pct >= 0.5 THEN 'Mid-tier Teams (.500–.599)'
    ELSE 'Low Winning Teams (<.500)'
  END AS win_tier,
  COUNT(*) AS num_star_players
FROM joined
GROUP BY win_tier;
'''

df_api_diagnostic = pd.read_sql(sql_api_diagnostic, con=engine)
pd.set_option('display.max_rows', None)
df_api_diagnostic
