In [39]:
import pandas as pd
import requests 
import time

In [43]:
def scrape_nba_data(start_season, end_season, stat_category='PTS', season_type='Regular Season'):
    all_data = []

    for year in range(start_season, end_season + 1):
        season = f"{year}-{str(year + 1)[-2:]}"
        url = (f"https://stats.nba.com/stats/leagueLeaders?"
               f"LeagueID=00&PerMode=PerGame&Scope=S&Season={season}"
               f"&SeasonType={season_type}&StatCategory={stat_category}")

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Referer': 'https://www.nba.com/',
            'x-nba-stats-origin': 'stats',
            'x-nba-stats-token': 'true'
        }

        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            data = response.json()['resultSet']['rowSet']
            columns = response.json()['resultSet']['headers']
            season_df = pd.DataFrame(data, columns=columns)
            season_df['Season'] = season
            all_data.append(season_df)

        except requests.exceptions.RequestException as e:
            print(f"Failed to fetch data for season {season}: {e}")
            continue

        time.sleep(2)

    # Combine all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()
    return combined_df

In [42]:
# Example usage
nba_stats = scrape_nba_data(2018, 2023)
if not nba_stats.empty:
    print(f"Data scraped for {nba_stats['Season'].nunique()} seasons.")
    print(nba_stats.head())
else:
    print("No data was retrieved.")

Data scraped for 6 seasons.
   PLAYER_ID  RANK                 PLAYER     TEAM_ID TEAM  GP   MIN   FGM  \
0     201935     1           James Harden  1610612745  HOU  78  36.8  10.8   
1     202331     2            Paul George  1610612760  OKC  77  36.9   9.2   
2     203507     3  Giannis Antetokounmpo  1610612749  MIL  72  32.8  10.0   
3     203954     4            Joel Embiid  1610612755  PHI  64  33.7   9.1   
4     201939     5          Stephen Curry  1610612744  GSW  69  33.8   9.2   

    FGA  FG_PCT  ...  OREB  DREB   REB  AST  STL  BLK  TOV   PTS   EFF  \
0  24.5   0.442  ...   0.8   5.8   6.6  7.5  2.0  0.7  5.0  36.1  33.1   
1  21.0   0.438  ...   1.4   6.8   8.2  4.1  2.2  0.4  2.7  28.0  27.4   
2  17.3   0.578  ...   2.2  10.3  12.5  5.9  1.3  1.5  3.7  27.7  35.3   
3  18.7   0.484  ...   2.5  11.1  13.6  3.7  0.7  1.9  3.5  27.5  32.2   
4  19.4   0.472  ...   0.7   4.7   5.3  5.2  1.3  0.4  2.8  27.3  26.1   

    Season  
0  2018-19  
1  2018-19  
2  2018-19  
3  201

In [44]:
print(nba_stats['PLAYER'].value_counts())

PLAYER
Kyle Kuzma         6
DeMar DeRozan      6
Jrue Holiday       6
Jayson Tatum       6
Rudy Gobert        6
                  ..
Darren Collison    1
Dwayne Bacon       1
Elfrid Payton      1
Theo Maledon       1
Dalen Terry        1
Name: count, Length: 539, dtype: int64


In [45]:
print(nba_stats.isnull().sum())

PLAYER_ID    0
RANK         0
PLAYER       0
TEAM_ID      0
TEAM         0
GP           0
MIN          0
FGM          0
FGA          0
FG_PCT       0
FG3M         0
FG3A         0
FG3_PCT      0
FTM          0
FTA          0
FT_PCT       0
OREB         0
DREB         0
REB          0
AST          0
STL          0
BLK          0
TOV          0
PTS          0
EFF          0
Season       0
dtype: int64
