#### In this notebook, scraping and retrieval of data from the NBA official stats page will be conducted. 

All the pages are from this site: https://www.nba.com/stats


Firstly, we will attempt to retrieve a single page of data, before automatically retrieving multiple pages of the same "category"

First, I will retrieve data from this page. https://www.nba.com/stats/teams/isolation

API call: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayType=Isolation&PlayerOrTeam=T&SeasonType=Regular%20Season&SeasonYear=2024-25&TypeGrouping=offensive

From the API call URL above, we can observe several parameters that is important for directing to this specific link. 

In [6]:
import requests
import pandas as pd
import time

url = "https://stats.nba.com/stats/synergyplaytypes" #base api URL

#variable parameters is PlayType and Season Type

base_params = {"LeagueID":"00",
                "PerMode":"PerGame",
                "PlayerOrTeam":"T",
                "SeasonYear":"2024-25",
                "TypeGrouping":"offensive"}

playtype = ["Isolation","Transition","PRBallHandler","PRRollman","Postup","Spotup",
            "Handoff","Cut","OffScreen","OffRebound"]

seasontype = ["Playoffs","Regular Season"]


headers = {
    "User-Agent":"Mozzila/5.0",
    "Referer":"https://www.nba.com/"
}

playoffs_data = []
reg_szn_data = []
for season_type in seasontype:

    for play_type in playtype:

        params = base_params.copy()
        params['PlayType'] = play_type
        params['SeasonType'] = season_type

        response = requests.get(url,headers = headers, params = params)

        if response.status_code == 200: #if http request was successful 
            data = response.json()
            headers_ = data["resultSets"][0]['headers'] #data['resultSets'] returns a list of tables. (usually only 1 table)
            #we need to grab the first table from that list of table, so [0] will access the actual table
            #'headers' specifies the column we want to retrieve
            rows = data["resultSets"][0]['rowSet']

            df = pd.DataFrame(rows,columns=headers_)
            if season_type == "Playoffs":
                playoffs_data.append(df) # all_data is a list of DataFrames: [Isolation ... , Transition..., etc]
            else:
                reg_szn_data.append(df)
        else:
            print("HTTP Request could not be parsed")

        
        time.sleep(1) #pauses for 1 second to prevent getting rate-limited or blocked by the nba site

playoffs_df = pd.concat(playoffs_data,ignore_index = True) #pd.concat will stack all those DataFrame in all_data row_wise 
reg_szn_df = pd.concat(reg_szn_data,ignore_index = True)

#ignore_index resets row indices to be continuous from 0 

playoffs_df.to_csv("nba_playoffs_playtype_2024-2025.csv",index = False)
reg_szn_df.to_csv("nba_regular_season_playtype_2024-2025.csv",index = False)












