# NBA Player Stats Predictor Demo

This notebook demonstrates the key features of the NBA Player Stats Predictor:
- **Player Performance Analysis** - Historical stats and trends
- **ML Predictions** - XGBoost-based predictions for PTS, AST, REB
- **Rolling Averages** - Recent form analysis

**Live Dashboard:** [Hugging Face Space](https://huggingface.co/spaces/kingzman20/nba-player-predictor)

In [1]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime

# Load pre-computed data
df = pd.read_parquet('../data/engineered_data.parquet')
print(f"Loaded {len(df):,} game records for {df['PLAYER_NAME'].nunique()} players")
print(f"Seasons: {df['SEASON'].unique().tolist()}")

Loaded 68,922 game records for 761 players
Seasons: ['2023-24', '2024-25', '2025-26']


## Top Players by Points Per Game (2024-25 Season)

In [2]:
# Get current season stats
current_season = df[df['SEASON'] == '2024-25'].copy()

# Calculate season averages
season_avgs = current_season.groupby('PLAYER_NAME').agg({
    'PTS': 'mean',
    'AST': 'mean', 
    'REB': 'mean',
    'MIN': 'mean',
    'FG_PCT': 'mean',
    'GAME_DATE': 'count'
}).rename(columns={'GAME_DATE': 'GP'}).round(1)

# Filter players with at least 20 games
season_avgs = season_avgs[season_avgs['GP'] >= 20].sort_values('PTS', ascending=False)

# Top 15 scorers
top_scorers = season_avgs.head(15)

fig = go.Figure(go.Bar(
    x=top_scorers['PTS'],
    y=top_scorers.index,
    orientation='h',
    marker_color='#14b8a6',
    text=top_scorers['PTS'].apply(lambda x: f'{x:.1f}'),
    textposition='outside'
))

fig.update_layout(
    title='Top 15 Scorers - 2024-25 Season',
    xaxis_title='Points Per Game',
    yaxis_title='',
    template='plotly_dark',
    height=500,
    yaxis={'categoryorder': 'total ascending'}
)
fig.show()

## Player Deep Dive: Performance Over Time

Let's analyze a star player's performance trends.

In [3]:
# Select a player to analyze
PLAYER = "LeBron James"  # Change this to any player

player_df = df[df['PLAYER_NAME'] == PLAYER].sort_values('_date').copy()
player_current = player_df[player_df['SEASON'] == '2024-25']

print(f"\n{'='*50}")
print(f"  {PLAYER} - 2024-25 Season Stats")
print(f"{'='*50}")
print(f"  Games Played: {len(player_current)}")
print(f"  PPG: {player_current['PTS'].mean():.1f}")
print(f"  APG: {player_current['AST'].mean():.1f}")
print(f"  RPG: {player_current['REB'].mean():.1f}")
print(f"  MPG: {player_current['MIN'].mean():.1f}")
print(f"  FG%: {player_current['FG_PCT'].mean()*100:.1f}%")
print(f"{'='*50}")


  LeBron James - 2024-25 Season Stats
  Games Played: 70
  PPG: 24.4
  APG: 8.2
  RPG: 7.8
  MPG: 35.0
  FG%: 51.1%


In [4]:
# Points trend with rolling average
fig = go.Figure()

# Individual game points
fig.add_trace(go.Scatter(
    x=player_current['_date'],
    y=player_current['PTS'],
    mode='markers+lines',
    name='Points',
    line=dict(color='#14b8a6', width=1),
    marker=dict(size=8)
))

# 5-game rolling average
if 'rolling_avg_pts_5' in player_current.columns:
    fig.add_trace(go.Scatter(
        x=player_current['_date'],
        y=player_current['rolling_avg_pts_5'],
        mode='lines',
        name='5-Game Avg',
        line=dict(color='#f97066', width=3, dash='dash')
    ))

# Season average line
season_avg = player_current['PTS'].mean()
fig.add_hline(y=season_avg, line_dash="dot", line_color="white", 
              annotation_text=f"Season Avg: {season_avg:.1f}")

fig.update_layout(
    title=f"{PLAYER} - Points Per Game (2024-25)",
    xaxis_title='Date',
    yaxis_title='Points',
    template='plotly_dark',
    height=400,
    legend=dict(orientation='h', yanchor='bottom', y=1.02)
)
fig.show()

## Last 10 Games Performance

In [5]:
# Last 10 games
last_10 = player_current.tail(10).copy()

# Create grouped bar chart
fig = go.Figure()

# Format dates for display
last_10['game_label'] = last_10['MATCHUP'].str.split(' ').str[-1] + '<br>' + last_10['_date'].dt.strftime('%m/%d')

fig.add_trace(go.Bar(
    name='Points',
    x=last_10['game_label'],
    y=last_10['PTS'],
    marker_color='#14b8a6',
    text=last_10['PTS'].astype(int),
    textposition='outside'
))

fig.add_trace(go.Bar(
    name='Assists',
    x=last_10['game_label'],
    y=last_10['AST'],
    marker_color='#f97066',
    text=last_10['AST'].astype(int),
    textposition='outside'
))

fig.add_trace(go.Bar(
    name='Rebounds',
    x=last_10['game_label'],
    y=last_10['REB'],
    marker_color='#a78bfa',
    text=last_10['REB'].astype(int),
    textposition='outside'
))

fig.update_layout(
    title=f"{PLAYER} - Last 10 Games",
    xaxis_title='',
    yaxis_title='Stats',
    barmode='group',
    template='plotly_dark',
    height=400,
    legend=dict(orientation='h', yanchor='bottom', y=1.02)
)
fig.show()

In [6]:
# Last 10 games table
display_cols = ['GAME_DATE', 'MATCHUP', 'PTS', 'AST', 'REB', 'MIN', 'FG_PCT']
last_10_display = last_10[display_cols].copy()
last_10_display['FG_PCT'] = (last_10_display['FG_PCT'] * 100).round(1).astype(str) + '%'
last_10_display = last_10_display.sort_values('GAME_DATE', ascending=False)
last_10_display

Unnamed: 0,GAME_DATE,MATCHUP,PTS,AST,REB,MIN,FG_PCT
44448,"Mar 31, 2025",LAL vs. HOU,16,4,8,38,41.7%
44447,"Mar 29, 2025",LAL @ MEM,25,8,6,37,47.6%
44446,"Mar 27, 2025",LAL @ CHI,17,12,5,39,40.0%
44445,"Mar 26, 2025",LAL @ IND,13,7,13,38,33.3%
44454,"Apr 11, 2025",LAL vs. HOU,14,8,4,22,54.5%
44453,"Apr 09, 2025",LAL @ DAL,27,3,7,36,55.0%
44452,"Apr 08, 2025",LAL @ OKC,28,3,7,35,42.1%
44451,"Apr 06, 2025",LAL @ OKC,19,7,3,34,56.3%
44450,"Apr 04, 2025",LAL vs. NOP,27,8,0,33,52.9%
44449,"Apr 03, 2025",LAL vs. GSW,33,9,5,40,66.7%


## ML Predictions

Using XGBoost models trained on historical data with 50+ features including:
- Rolling averages (5, 10, 20 games)
- Home/Away performance
- Rest days
- Opponent defensive rating
- Minutes trend

In [7]:
from models.predictor import NBAPredictor
import os

# Load predictors
models_dir = '../models'
predictors = {}

for stat in ['pts', 'ast', 'reb']:
    path = os.path.join(models_dir, f'{stat}_predictor.pkl')
    if os.path.exists(path):
        predictors[stat.upper()] = NBAPredictor.load(path)
        print(f"Loaded {stat.upper()} predictor")

print(f"\nModels loaded: {list(predictors.keys())}")

Loaded PTS predictor (MAE: 4.67)
Loaded PTS predictor
Loaded AST predictor (MAE: 1.25)
Loaded AST predictor
Loaded REB predictor (MAE: 2.22)
Loaded REB predictor

Models loaded: ['PTS', 'AST', 'REB']


In [8]:
# Make predictions for our player
print(f"\n{'='*50}")
print(f"  {PLAYER} - Next Game Predictions")
print(f"{'='*50}\n")

for stat, predictor in predictors.items():
    try:
        result = predictor.predict_player_game(PLAYER, df)
        if 'error' not in result:
            pred_key = f'predicted_{stat.lower()}'
            prediction = result.get(pred_key, 'N/A')
            
            # Get recent average for comparison
            recent_avg = player_current[stat].tail(5).mean()
            
            print(f"  {stat}:")
            print(f"    Prediction: {prediction:.1f}")
            print(f"    Last 5 Avg: {recent_avg:.1f}")
            print()
    except Exception as e:
        print(f"  {stat}: Error - {e}")


  LeBron James - Next Game Predictions

  PTS:
    Prediction: 26.7
    Last 5 Avg: 23.0

  AST:
    Prediction: 2.5
    Last 5 Avg: 5.8

  REB:
    Prediction: 4.5
    Last 5 Avg: 4.2



## Compare Multiple Players

In [9]:
# Compare top players
PLAYERS_TO_COMPARE = ["LeBron James", "Stephen Curry", "Kevin Durant", "Giannis Antetokounmpo", "Luka Doncic"]

comparison_data = []
for player in PLAYERS_TO_COMPARE:
    player_season = current_season[current_season['PLAYER_NAME'] == player]
    if len(player_season) >= 10:
        comparison_data.append({
            'Player': player,
            'PPG': player_season['PTS'].mean(),
            'APG': player_season['AST'].mean(),
            'RPG': player_season['REB'].mean(),
            'Games': len(player_season)
        })

comparison_df = pd.DataFrame(comparison_data).round(1)
comparison_df

Unnamed: 0,Player,PPG,APG,RPG,Games
0,LeBron James,24.4,8.2,7.8,70
1,Stephen Curry,24.5,6.0,4.4,70
2,Kevin Durant,26.6,4.2,6.0,62
3,Giannis Antetokounmpo,30.4,6.5,11.9,67


In [10]:
# Radar chart comparison
fig = go.Figure()

categories = ['PPG', 'APG', 'RPG']
colors = ['#14b8a6', '#f97066', '#a78bfa', '#60a5fa', '#fbbf24']

for i, row in comparison_df.iterrows():
    # Normalize values for radar chart (0-1 scale based on max in category)
    values = [row['PPG'], row['APG'], row['RPG']]
    
    fig.add_trace(go.Scatterpolar(
        r=values + [values[0]],  # Close the polygon
        theta=categories + [categories[0]],
        name=row['Player'].split()[-1],  # Last name only
        line=dict(color=colors[i % len(colors)], width=2),
        fill='toself',
        opacity=0.6
    ))

fig.update_layout(
    polar=dict(
        radialaxis=dict(visible=True, range=[0, 35]),
        bgcolor='rgba(0,0,0,0)'
    ),
    title='Player Comparison - 2024-25 Season',
    template='plotly_dark',
    height=500,
    showlegend=True
)
fig.show()

## Home vs Away Performance

In [11]:
# Analyze home vs away for selected player
player_current['is_home'] = player_current['MATCHUP'].str.contains('vs.')

home_away = player_current.groupby('is_home').agg({
    'PTS': 'mean',
    'AST': 'mean',
    'REB': 'mean',
    'GAME_DATE': 'count'
}).rename(columns={'GAME_DATE': 'Games'}).round(1)

home_away.index = ['Away', 'Home']

fig = make_subplots(rows=1, cols=3, subplot_titles=['Points', 'Assists', 'Rebounds'])

for i, stat in enumerate(['PTS', 'AST', 'REB']):
    colors_ha = ['#f97066', '#14b8a6']  # Away=coral, Home=teal
    fig.add_trace(
        go.Bar(
            x=home_away.index,
            y=home_away[stat],
            marker_color=colors_ha,
            text=home_away[stat],
            textposition='outside',
            showlegend=False
        ),
        row=1, col=i+1
    )

fig.update_layout(
    title=f"{PLAYER} - Home vs Away (2024-25)",
    template='plotly_dark',
    height=350
)
fig.show()

print(f"\nHome games: {home_away.loc['Home', 'Games']:.0f} | Away games: {home_away.loc['Away', 'Games']:.0f}")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




Home games: 34 | Away games: 36


## Season Trends - All Star Players

In [12]:
# Monthly scoring trends for top players
top_players = ['LeBron James', 'Stephen Curry', 'Kevin Durant', 'Luka Doncic', 'Jayson Tatum']

fig = go.Figure()

for i, player in enumerate(top_players):
    player_data = current_season[current_season['PLAYER_NAME'] == player].copy()
    if len(player_data) > 0:
        player_data['month'] = player_data['_date'].dt.to_period('M')
        monthly = player_data.groupby('month')['PTS'].mean()
        
        fig.add_trace(go.Scatter(
            x=[str(m) for m in monthly.index],
            y=monthly.values,
            mode='lines+markers',
            name=player.split()[-1],
            line=dict(width=3)
        ))

fig.update_layout(
    title='Monthly Scoring Average - Top Players',
    xaxis_title='Month',
    yaxis_title='PPG',
    template='plotly_dark',
    height=400,
    legend=dict(orientation='h', yanchor='bottom', y=1.02)
)
fig.show()

## Model Performance Summary

In [13]:
# Display model metrics
print("\n" + "="*60)
print("  XGBoost Model Performance")
print("="*60)

model_metrics = {
    'Points (PTS)': {'MAE': 4.67, 'Features': 77},
    'Assists (AST)': {'MAE': 1.25, 'Features': 77},
    'Rebounds (REB)': {'MAE': 2.22, 'Features': 77}
}

for model, metrics in model_metrics.items():
    print(f"\n  {model}:")
    print(f"    Mean Absolute Error: {metrics['MAE']:.2f}")
    print(f"    Features Used: {metrics['Features']}")

print("\n" + "="*60)
print("\n  Key Features:")
print("  - Rolling averages (5, 10, 20 games)")
print("  - Home/Away indicator")
print("  - Rest days")
print("  - Opponent defensive rating")
print("  - Minutes trend")
print("  - Season averages")
print("  - Position matchup data")


  XGBoost Model Performance

  Points (PTS):
    Mean Absolute Error: 4.67
    Features Used: 77

  Assists (AST):
    Mean Absolute Error: 1.25
    Features Used: 77

  Rebounds (REB):
    Mean Absolute Error: 2.22
    Features Used: 77


  Key Features:
  - Rolling averages (5, 10, 20 games)
  - Home/Away indicator
  - Rest days
  - Opponent defensive rating
  - Minutes trend
  - Season averages
  - Position matchup data


---

## Try the Live Dashboard

For interactive predictions and real-time data:

**[Launch NBA Player Props Dashboard](https://huggingface.co/spaces/kingzman20/nba-player-predictor)**

---

*Built with Python, XGBoost, Plotly, and Dash*