In [3]:
import numpy as np
import pandas as pd
import time
import datetime as datetime
from nba_api.stats.endpoints import leaguedashteamstats
from nba_api.stats.endpoints import leaguedashplayerstats

In [45]:
#Team stats
team_stats = leaguedashteamstats.LeagueDashTeamStats(
    season='2025-26',          
    season_type_all_star='Regular Season'
)

#Convert the data to a pandas DataFrame
df = team_stats.get_data_frames()[0]

# Remove TEAM_ID
df = df.drop(columns=['TEAM_ID'], errors='ignore')

# Save the data to a CSV file
print(df.to_csv('team_stats.csv', index=False))


None


In [46]:
#Per game player staticstics
#scraping for points, rebounds, assists, 3 pointers made,steals, blocks, PR, PA, RA, and PRA

print("Fetching player per game stats for 2025-2026 season")
stats = leaguedashplayerstats.LeagueDashPlayerStats(
    season='2025-26',
    season_type_all_star='Regular Season',
    measure_type_detailed_defense='Base',
    per_mode_detailed='PerGame'
)

df = stats.get_data_frames()[0]

#set up for PR, PA, RA, and PRA
df['PTS+REB'] = df['PTS'] + df['REB']
df['PTS+AST'] = df['PTS'] + df['AST']
df['REB+AST'] = df['REB'] + df['AST']
df['PTS+REB+AST'] = df['PTS'] + df['REB'] + df['AST']

#filter for columns
target_columns = [
    'PLAYER_NAME', 
    'TEAM_ABBREVIATION',
    'GP',             
    'MIN',            
    'PTS', 
    'AST', 
    'REB', 
    'FG3M',           
    'STL', 
    'BLK', 
    'PTS+REB', 
    'PTS+AST', 
    'REB+AST', 
    'PTS+REB+AST'
]

final_df = df[target_columns]

#rounding to 1 decimal place
numeric_cols = ['PTS', 'AST', 'REB', 'FG3M', 'STL', 'BLK', 'PTS+REB', 'PTS+AST', 'REB+AST', 'PTS+REB+AST']
final_df[numeric_cols] = final_df[numeric_cols].round(1)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

final_df = final_df.sort_values(by='PTS+REB+AST', ascending=False).reset_index(drop=True)

print(final_df.to_csv('NBA_player_stats_25-26.csv', index=False))



Fetching player per game stats for 2025-2026 season
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df[numeric_cols] = final_df[numeric_cols].round(1)


In [47]:
#past 10 games stats for teams
print("Fetching past 10 games stats for teams")
team_stats = leaguedashteamstats.LeagueDashTeamStats(
    season='2025-26',
    season_type_all_star='Regular Season',
    last_n_games=5,
    per_mode_detailed='PerGame'
)

df_team_stats = team_stats.get_data_frames()[0]

# Remove TEAM_ID
df_team_stats = df_team_stats.drop(columns=['TEAM_ID'], errors='ignore')

print(df_team_stats.to_csv('NBA_team_stats_25-26_last_5.csv', index=False))

Fetching past 10 games stats for teams
None


In [48]:
#playes last 10 game stats
print("Fetching player per game stats for 2025-2026 season")
stats = leaguedashplayerstats.LeagueDashPlayerStats(
    season='2025-26',
    season_type_all_star='Regular Season',
    measure_type_detailed_defense='Base',
    last_n_games=5,
    per_mode_detailed='PerGame'
)

df = stats.get_data_frames()[0]

#set up for PR, PA, RA, and PRA
df['PTS+REB'] = df['PTS'] + df['REB']
df['PTS+AST'] = df['PTS'] + df['AST']
df['REB+AST'] = df['REB'] + df['AST']
df['PTS+REB+AST'] = df['PTS'] + df['REB'] + df['AST']

#filter for columns
target_columns = [
    'PLAYER_NAME', 
    'TEAM_ABBREVIATION',
    'GP',             
    'MIN',            
    'PTS', 
    'AST', 
    'REB', 
    'FG3M',           
    'STL', 
    'BLK', 
    'PTS+REB', 
    'PTS+AST', 
    'REB+AST', 
    'PTS+REB+AST'
]

final_df = df[target_columns]

#rounding to 1 decimal place
numeric_cols = ['PTS', 'AST', 'REB', 'FG3M', 'STL', 'BLK', 'PTS+REB', 'PTS+AST', 'REB+AST', 'PTS+REB+AST']
final_df[numeric_cols] = final_df[numeric_cols].round(1)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

final_df = final_df.sort_values(by='PTS+REB+AST', ascending=False).reset_index(drop=True)

print(final_df.to_csv('NBA_player_stats_25-26_last_5.csv', index=False))



Fetching player per game stats for 2025-2026 season
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df[numeric_cols] = final_df[numeric_cols].round(1)


In [6]:
#Scraping future games for the 2025-26 season

# List of months remaining in the 2025-26 season
months = ['january', 'february', 'march', 'april']
season_year = 2026
all_games = []

print("Fetching 2025-26 NBA Schedule...")

for month in months:
    url = f"https://www.basketball-reference.com/leagues/NBA_{season_year}_games-{month}.html"
    try:
        # Read the tables from the page
        tables = pd.read_html(url)
        # The first table is usually the schedule
        df = tables[0]
        
        # Rename columns for clarity (Standardizing Headers)
        df.rename(columns={
            'Date': 'Date',
            'Start (ET)': 'Time (ET)',
            'Visitor/Neutral': 'Away Team',
            'Home/Neutral': 'Home Team',
            'Arena': 'Venue'
        }, inplace=True)
        
        # Keep only relevant columns
        cols_to_keep = ['Date', 'Time (ET)', 'Away Team', 'Home Team', 'Venue']
        # Check if columns exist before selecting
        df = df[[c for c in cols_to_keep if c in df.columns]]
        
        # Filter out "Playoffs" headers or empty rows if any exist in the raw data
        df = df[df['Date'] != 'Date']
        
        all_games.append(df)
        print(f"Processed {month.capitalize()}...")
        time.sleep(2) # Be polite to the server
        
    except Exception as e:
        print(f"Could not retrieve data for {month}: {e}")

# Combine all months
full_schedule = pd.concat(all_games, ignore_index=True)

# Filter for future games only (From Jan 27, 2026 onwards)
# Convert date column to datetime objects
full_schedule['Date_Obj'] = pd.to_datetime(full_schedule['Date'])
cutoff_date = pd.Timestamp("2026-01-27")
future_games = full_schedule[full_schedule['Date_Obj'] >= cutoff_date].copy()

# Sort by date
future_games.sort_values(by='Date_Obj', inplace=True)

# Drop helper column
future_games.drop(columns=['Date_Obj'], inplace=True)

# Save to CSV
filename = 'nba_schedule_2026.csv'
future_games.to_csv(filename, index=False)

print(f"Success! Saved {len(future_games)} games to {filename}")

Fetching 2025-26 NBA Schedule...
Processed January...
Processed February...
Processed March...
Processed April...
Success! Saved 538 games to nba_schedule_2026.csv
