# Scraping Team Data

In this notebook we will scrape data on the teams in the NBA. To do this we will use the `teamyearbyyearstats` endpoint which will allow us to collect stats on every team across the years 

In [9]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamyearbyyearstats
import time
import os
from loguru import logger
from IPython.display import clear_output

Retrieving list of all teams to get their ids which will be later passed into the function to scrape stats

In [2]:
all_teams = teams.get_teams()
all_teams

[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Georgia',
  'year_founded': 1949},
 {'id': 1610612738,
  'full_name': 'Boston Celtics',
  'abbreviation': 'BOS',
  'nickname': 'Celtics',
  'city': 'Boston',
  'state': 'Massachusetts',
  'year_founded': 1946},
 {'id': 1610612739,
  'full_name': 'Cleveland Cavaliers',
  'abbreviation': 'CLE',
  'nickname': 'Cavaliers',
  'city': 'Cleveland',
  'state': 'Ohio',
  'year_founded': 1970},
 {'id': 1610612740,
  'full_name': 'New Orleans Pelicans',
  'abbreviation': 'NOP',
  'nickname': 'Pelicans',
  'city': 'New Orleans',
  'state': 'Louisiana',
  'year_founded': 2002},
 {'id': 1610612741,
  'full_name': 'Chicago Bulls',
  'abbreviation': 'CHI',
  'nickname': 'Bulls',
  'city': 'Chicago',
  'state': 'Illinois',
  'year_founded': 1966},
 {'id': 1610612742,
  'full_name': 'Dallas Mavericks',
  'abbreviation': 'DAL',
  'nickname': 'Mavericks',
  'city': 'Dallas',

In [56]:
# function to scrape and save team data
def fetch_and_save_team_data(team_id, team_name, season):
    
    logger.info(f"Fetching data for team: {team_name} (ID: {team_id})")

    team_stats = teamyearbyyearstats.TeamYearByYearStats(team_id=str(team_id), season_type_all_star=season)
    team_stats_df = team_stats.get_data_frames()[0]  
    output_dir = os.path.join("..", "data", "raw", "teams_regSeason_data")     
    os.makedirs(output_dir, exist_ok=True)

    output_filename = os.path.join(output_dir, f"{team_name}_{team_id}.csv")
    team_stats_df.to_csv(output_filename, index=False)
    logger.info(f"Data saved to: {output_filename}")
    clear_output()
    time.sleep(1)

In [55]:
# sorting teams by id
sorted_teams = sorted(all_teams, key=lambda x: x['id'])

In [53]:
# retrieving team stats
for team in sorted_teams:
    team_id = team['id']
    team_name = team['full_name']
    fetch_and_save_team_data(team_id, team_name, "Regular Season")

In [3]:
team_data_dir = "../data/raw/teams_regSeason_data"


team_data_frames = []

for filename in os.listdir(team_data_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(team_data_dir, filename)
        
        team_name = filename.split("_")[0]  
        df = pd.read_csv(file_path)
        df['team_name'] = team_name
        team_data_frames.append(df)
team_data_combined = pd.concat(team_data_frames, ignore_index=True)

In [4]:
team_data_combined

Unnamed: 0,TEAM_ID,TEAM_CITY,TEAM_NAME,YEAR,GP,WINS,LOSSES,WIN_PCT,CONF_RANK,DIV_RANK,...,DREB,REB,AST,PF,STL,TOV,BLK,PTS,PTS_RANK,team_name
0,1610612737,Tri-Cities,Blackhawks,1949-50,64,29,35,0.453,0,3,...,0,0,1330,2057,0,0,0,5313,10,Atlanta Hawks
1,1610612737,Tri-Cities,Blackhawks,1950-51,68,25,43,0.368,0,5,...,0,0,1476,2092,0,0,0,5730,3,Atlanta Hawks
2,1610612737,Milwaukee,Hawks,1951-52,66,17,49,0.258,0,5,...,0,0,1229,1848,0,0,0,4833,10,Atlanta Hawks
3,1610612737,Milwaukee,Hawks,1952-53,71,27,44,0.380,0,5,...,0,0,1427,2120,0,0,0,5389,9,Atlanta Hawks
4,1610612737,Milwaukee,Hawks,1953-54,72,21,51,0.292,0,4,...,0,0,1298,1771,0,0,0,5038,9,Atlanta Hawks
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1622,1610612764,Washington,Wizards,2019-20,72,25,47,0.347,9,3,...,2295,3027,1801,1634,574,1020,309,8238,8,Washington Wizards
1623,1610612764,Washington,Wizards,2020-21,72,34,38,0.472,0,0,...,2557,3254,1835,1555,528,1036,297,8398,3,Washington Wizards
1624,1610612764,Washington,Wizards,2021-22,82,35,47,0.427,12,4,...,2798,3535,2052,1545,522,1077,406,8907,22,Washington Wizards
1625,1610612764,Washington,Wizards,2022-23,82,35,47,0.427,12,3,...,2804,3578,2083,1539,561,1158,424,9279,21,Washington Wizards


In [5]:
team_data_combined.columns

Index(['TEAM_ID', 'TEAM_CITY', 'TEAM_NAME', 'YEAR', 'GP', 'WINS', 'LOSSES',
       'WIN_PCT', 'CONF_RANK', 'DIV_RANK', 'PO_WINS', 'PO_LOSSES',
       'CONF_COUNT', 'DIV_COUNT', 'NBA_FINALS_APPEARANCE', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'PF', 'STL', 'TOV', 'BLK', 'PTS', 'PTS_RANK',
       'team_name'],
      dtype='object')