# `nba_api`: Team Schedule Investigation

Trying to determine which endpoint is best for getting a team's schedule for an entire season.

## Global Imports 

In [1]:
import pandas as pd
import nba_api as nba

## Team Information

In [4]:
# Import team data
from nba_api.stats.static import teams
nba_teams = teams.get_teams()

# Iterate through the teams
# Print ID and full team name
for tm in nba_teams:    
    team_id = tm['id']
    team_name = tm['full_name']
    print(f'{team_id}\t{team_name}')

1610612737	Atlanta Hawks
1610612738	Boston Celtics
1610612739	Cleveland Cavaliers
1610612740	New Orleans Pelicans
1610612741	Chicago Bulls
1610612742	Dallas Mavericks
1610612743	Denver Nuggets
1610612744	Golden State Warriors
1610612745	Houston Rockets
1610612746	Los Angeles Clippers
1610612747	Los Angeles Lakers
1610612748	Miami Heat
1610612749	Milwaukee Bucks
1610612750	Minnesota Timberwolves
1610612751	Brooklyn Nets
1610612752	New York Knicks
1610612753	Orlando Magic
1610612754	Indiana Pacers
1610612755	Philadelphia 76ers
1610612756	Phoenix Suns
1610612757	Portland Trail Blazers
1610612758	Sacramento Kings
1610612759	San Antonio Spurs
1610612760	Oklahoma City Thunder
1610612761	Toronto Raptors
1610612762	Utah Jazz
1610612763	Memphis Grizzlies
1610612764	Washington Wizards
1610612765	Detroit Pistons
1610612766	Charlotte Hornets


## TeamGameLog

In [5]:
# Single Team Game Log Preview
from nba_api.stats.endpoints import teamgamelog
sixers_game_log = teamgamelog.TeamGameLog(season_all='2018-19', season_type_all_star='Regular Season', team_id=1610612755)

In [12]:
sixers_game_log.get_data_frames()[0].sort_values('Game_ID').head()

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
47,1610612755,21800001,"OCT 16, 2018",PHI @ BOS,L,0,1,0.0,240,34,...,0.609,6,41,47,18,8,5,16,20,87
46,1610612755,21800014,"OCT 18, 2018",PHI vs. CHI,W,1,1,0.5,240,44,...,0.9,11,44,55,30,10,8,13,17,127
45,1610612755,21800029,"OCT 20, 2018",PHI vs. ORL,W,2,1,0.667,240,45,...,0.9,13,33,46,33,8,3,10,21,116
44,1610612755,21800049,"OCT 23, 2018",PHI @ DET,L,2,2,0.5,265,49,...,0.714,8,40,48,32,3,8,11,24,132
43,1610612755,21800059,"OCT 24, 2018",PHI @ MIL,L,2,3,0.4,240,38,...,0.7,8,42,50,29,4,5,16,27,108


## TeamInfoCommon

In [14]:
from nba_api.stats.endpoints import teaminfocommon
sixers_common_info = teaminfocommon.TeamInfoCommon(league_id='00', team_id=1610612755, season_type_nullable='Regular Season', season_nullable='2018-19')

In [15]:
sixers_common_info.get_data_frames()[0].head()

Unnamed: 0,TEAM_ID,SEASON_YEAR,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CONFERENCE,TEAM_DIVISION,TEAM_CODE,W,L,PCT,CONF_RANK,DIV_RANK,MIN_YEAR,MAX_YEAR
0,1610612755,2018-19,Philadelphia,76ers,PHI,East,Atlantic,sixers,31,17,0.646,4,2,1949,2018


## ScoreboardV2

In [16]:
from nba_api.stats.endpoints import scoreboardv2
jan22_scoreboard = scoreboardv2.ScoreboardV2(league_id='00', game_date='2019-01-22', day_offset=0)

In [29]:
schedule_dict = jan22_scoreboard.get_dict()['resultSets'][0]

In [30]:
schedule_df = pd.DataFrame(data=schedule_dict['rowSet'], columns=schedule_dict['headers'])

In [31]:
schedule_df.head()

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,HOME_TV_BROADCASTER_ABBREVIATION,AWAY_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,ARENA_NAME,WH_STATUS
0,2019-01-22T00:00:00,1,21800702,2,4th Qtr,20190122/SACTOR,1610612761,1610612758,2018,4,2:28,,TSN,NBCSCA,Q4 2:28 -,Scotiabank Arena,0
1,2019-01-22T00:00:00,2,21800703,2,3rd Qtr,20190122/POROKC,1610612760,1610612757,2018,3,11:50,NBA TV,FSOK,NBCSNW/BP,Q3 11:50 - NBA TV,Chesapeake Energy Arena,0
2,2019-01-22T00:00:00,3,21800704,2,2nd Qtr,20190122/LACDAL,1610612742,1610612746,2018,2,7:05,,FSSW-DAL,FSPT,Q2 7:05 -,American Airlines Center,0
3,2019-01-22T00:00:00,4,21800705,2,1st Qtr,20190122/MINPHX,1610612756,1610612750,2018,1,5:13,,FSA,FSNTH,Q1 5:13 -,Talking Stick Resort Arena,0


In [33]:
future_scoreboard = scoreboardv2.ScoreboardV2(league_id='00', game_date='2019-01-29', day_offset=0)
future_dict = future_scoreboard.get_dict()['resultSets'][0]
future_df = pd.DataFrame(data=future_dict['rowSet'], columns=future_dict['headers'])
future_df.head()

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,HOME_TV_BROADCASTER_ABBREVIATION,AWAY_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,ARENA_NAME,WH_STATUS
0,2019-01-29T00:00:00,1,21800749,1,7:00 pm ET,20190129/WASCLE,1610612739,1610612764,2018,0,,,FSO,NBCSWA,Q0 -,Quicken Loans Arena,0
1,2019-01-29T00:00:00,2,21800750,1,7:00 pm ET,20190129/MILDET,1610612765,1610612749,2018,0,,,FSD,FSWIS,Q0 -,Little Caesars Arena,0
2,2019-01-29T00:00:00,3,21800751,1,7:00 pm ET,20190129/OKCORL,1610612753,1610612760,2018,0,,,FSFL,FSOK,Q0 -,Amway Center,0
3,2019-01-29T00:00:00,4,21800752,1,7:30 pm ET,20190129/CHIBKN,1610612751,1610612741,2018,0,,,YES,WGN,Q0 -,Barclays Center,0
4,2019-01-29T00:00:00,5,21800753,1,8:00 pm ET,20190129/NOPHOU,1610612745,1610612740,2018,0,,TNT,ATTSN-SW,,Q0 - TNT,Toyota Center,0


## Using ScoreboardV2 to build out entire schedule

Build out date range covering 2018-19 NBA Regular Season

In [38]:
season_dt_range = [dt.strftime('%Y-%m-%d') for dt in pd.date_range('2018-10-16', '2019-04-10')]

In [39]:
season_dt_range[0], season_dt_range[-1]

('2018-10-16', '2019-04-10')

Iterate through date range and pull scoreboard data

In [66]:
# Empty DataFrame to hold daily information
global_scoreboard_df = pd.DataFrame()

# Pared down column list 
column_list = ['GAME_DATE_EST', 'GAME_STATUS_TEXT', 'GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON', 'ARENA_NAME']

# For each date...
for dt in season_dt_range:
    
    # Pull data from endpoint as dict for given day
    scoreboard_dict = scoreboardv2.ScoreboardV2(league_id='00', game_date=dt, day_offset=0).get_dict()['resultSets'][0]
    # Convert to local DataFrame
    scoreboard_df = pd.DataFrame(data=scoreboard_dict['rowSet'], columns=scoreboard_dict['headers'])
    # Isolate the pared down column list
    scoreboard_df = scoreboard_df.loc[:, column_list]
    # Append to global DataFrame
    global_scoreboard_df = global_scoreboard_df.append(scoreboard_df)

In [67]:
global_scoreboard_df.sample(10)

Unnamed: 0,GAME_DATE_EST,GAME_STATUS_TEXT,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,ARENA_NAME
6,2018-12-26T00:00:00,Final,21800508,1610612763,1610612739,2018,FedExForum
1,2019-02-06T00:00:00,8:00 pm ET,21800805,1610612741,1610612740,2018,United Center
0,2018-10-16T00:00:00,Final,21800001,1610612738,1610612755,2018,TD Garden
0,2019-04-05T00:00:00,7:00 pm ET,21801180,1610612766,1610612761,2018,Spectrum Center
6,2019-04-05T00:00:00,8:00 pm ET,21801186,1610612760,1610612765,2018,Chesapeake Energy Arena
3,2019-02-02T00:00:00,7:00 pm ET,21800778,1610612764,1610612749,2018,Capital One Arena
6,2018-12-08T00:00:00,Final,21800384,1610612763,1610612747,2018,FedExForum
2,2019-01-02T00:00:00,Final,21800556,1610612764,1610612737,2018,Capital One Arena
3,2018-12-19T00:00:00,Final,21800458,1610612738,1610612756,2018,TD Garden
7,2019-03-28T00:00:00,8:30 pm ET,21801129,1610612759,1610612739,2018,AT&T Center


In [68]:
global_scoreboard_df.dtypes

GAME_DATE_EST       object
GAME_STATUS_TEXT    object
GAME_ID             object
HOME_TEAM_ID        object
VISITOR_TEAM_ID     object
SEASON              object
ARENA_NAME          object
dtype: object

In [69]:
global_scoreboard_df['GAME_ID'] = global_scoreboard_df['GAME_ID'].astype('int64')
global_scoreboard_df['HOME_TEAM_ID'] = global_scoreboard_df['HOME_TEAM_ID'].astype('int64')
global_scoreboard_df['VISITOR_TEAM_ID'] = global_scoreboard_df['VISITOR_TEAM_ID'].astype('int64')
global_scoreboard_df['SEASON'] = global_scoreboard_df['SEASON'].astype('int64')

In [70]:
global_scoreboard_df.dtypes

GAME_DATE_EST       object
GAME_STATUS_TEXT    object
GAME_ID              int64
HOME_TEAM_ID         int64
VISITOR_TEAM_ID      int64
SEASON               int64
ARENA_NAME          object
dtype: object

Create DataFrame with team information

**Team Data Structure:**
```
team = {
    'id': team_id,
    'full_name': full_name,
    'abbreviation': abbreviation,
    'nickname': nickname,
    'city': city,
    'state': state,
    'year_founded': year_founded,
}
```

In [71]:
from nba_api.stats.static import teams
team_df = pd.DataFrame(teams.get_teams())
team_df.sample(5)

Unnamed: 0,abbreviation,city,full_name,id,nickname,state,year_founded
14,BKN,Brooklyn,Brooklyn Nets,1610612751,Nets,New York,1976
28,DET,Detroit,Detroit Pistons,1610612765,Pistons,Michigan,1948
22,SAS,San Antonio,San Antonio Spurs,1610612759,Spurs,Texas,1976
25,UTA,Utah,Utah Jazz,1610612762,Jazz,Utah,1974
1,BOS,Boston,Boston Celtics,1610612738,Celtics,Massachusetts,1946


In [72]:
team_df.dtypes

abbreviation    object
city            object
full_name       object
id               int64
nickname        object
state           object
year_founded     int64
dtype: object

Enrich schedule data with home team data

In [73]:
columns = column_list + ['abbreviation', 'full_name', 'city', 'nickname', 'state']
column_map = {'abbreviation':'HOME_TEAM_ABBR', 'full_name':'HOME_TEAM_NAME', 'city':'HOME_TEAM_CITY', 'nickname':'HOME_TEAM_NICKNAME', 'state':'HOME_TEAM_STATE'}
global_scoreboard_df = (global_scoreboard_df.merge(team_df, left_on='HOME_TEAM_ID', right_on='id')
                                            .loc[:, columns]
                                            .rename(columns=column_map))

Enrich schedule data with visitor team data

In [74]:
columns = list(global_scoreboard_df.columns) + ['abbreviation', 'full_name', 'city', 'nickname', 'state']
column_map = {'abbreviation':'VISITOR_TEAM_ABBR', 'full_name':'VISITOR_TEAM_NAME', 'city':'VISITOR_TEAM_CITY', 'nickname':'VISITOR_TEAM_NICKNAME', 'state':'VISITOR_TEAM_STATE'}
global_scoreboard_df = (global_scoreboard_df.merge(team_df, left_on='VISITOR_TEAM_ID', right_on='id')
                                            .loc[:, columns]
                                            .rename(columns=column_map))

Reorder columns

In [75]:
list(global_scoreboard_df.columns)

['GAME_DATE_EST',
 'GAME_STATUS_TEXT',
 'GAME_ID',
 'HOME_TEAM_ID',
 'VISITOR_TEAM_ID',
 'SEASON',
 'ARENA_NAME',
 'HOME_TEAM_ABBR',
 'HOME_TEAM_NAME',
 'HOME_TEAM_CITY',
 'HOME_TEAM_NICKNAME',
 'HOME_TEAM_STATE',
 'VISITOR_TEAM_ABBR',
 'VISITOR_TEAM_NAME',
 'VISITOR_TEAM_CITY',
 'VISITOR_TEAM_NICKNAME',
 'VISITOR_TEAM_STATE']

In [76]:
reordered_columns = [
    'SEASON',
    'GAME_DATE_EST',
    'GAME_ID',
    'ARENA_NAME',
    'VISITOR_TEAM_ID',
    'VISITOR_TEAM_ABBR',
    'VISITOR_TEAM_NAME',
    'VISITOR_TEAM_CITY',
    'VISITOR_TEAM_NICKNAME',
    'VISITOR_TEAM_STATE',
    'HOME_TEAM_ID',
    'HOME_TEAM_ABBR',
    'HOME_TEAM_NAME',
    'HOME_TEAM_CITY',
    'HOME_TEAM_NICKNAME',
    'HOME_TEAM_STATE',   
]
global_scoreboard_df = global_scoreboard_df.loc[:, reordered_columns]

In [77]:
global_scoreboard_df.sample(5)

Unnamed: 0,SEASON,GAME_DATE_EST,GAME_ID,ARENA_NAME,VISITOR_TEAM_ID,VISITOR_TEAM_ABBR,VISITOR_TEAM_NAME,VISITOR_TEAM_CITY,VISITOR_TEAM_NICKNAME,VISITOR_TEAM_STATE,HOME_TEAM_ID,HOME_TEAM_ABBR,HOME_TEAM_NAME,HOME_TEAM_CITY,HOME_TEAM_NICKNAME,HOME_TEAM_STATE
114,2018,2019-03-08T00:00:00,21800980,United Center,1610612765,DET,Detroit Pistons,Detroit,Pistons,Michigan,1610612741,CHI,Chicago Bulls,Chicago,Bulls,Illinois
651,2018,2019-03-22T00:00:00,21801086,Staples Center,1610612751,BKN,Brooklyn Nets,Brooklyn,Nets,New York,1610612747,LAL,Los Angeles Lakers,Los Angeles,Lakers,California
860,2018,2019-01-15T00:00:00,21800653,State Farm Arena,1610612760,OKC,Oklahoma City Thunder,Oklahoma City,Thunder,Oklahoma,1610612737,ATL,Atlanta Hawks,Atlanta,Hawks,Atlanta
955,2018,2019-03-16T00:00:00,21801039,AT&T Center,1610612757,POR,Portland Trail Blazers,Portland,Trail Blazers,Oregon,1610612759,SAS,San Antonio Spurs,San Antonio,Spurs,Texas
850,2018,2018-12-22T00:00:00,21800485,Vivint Smart Home Arena,1610612760,OKC,Oklahoma City Thunder,Oklahoma City,Thunder,Oklahoma,1610612762,UTA,Utah Jazz,Utah,Jazz,Utah


Dump to CSV

In [78]:
global_scoreboard_df.to_csv('./nba_schedule_2018_19.csv', sep='|', header=True, index=False, encoding='utf-8')

## Get the Coordinates of the NBA Cities

In [79]:
# !pip install geopy

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/a8/5c/ca78a17b2c6fe1179e9221c9280bb5d8caaf048a631ed72caed40c52de24/geopy-1.18.1-py2.py3-none-any.whl (98kB)
[K    100% |████████████████████████████████| 102kB 1.3MB/s a 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/5b/ac/4f348828091490d77899bc74e92238e2b55c59392f21948f296e94e50e2b/geographiclib-1.49.tar.gz
Building wheels for collected packages: geographiclib
  Running setup.py bdist_wheel for geographiclib ... [?25ldone
[?25h  Stored in directory: /Users/leon/Library/Caches/pip/wheels/99/45/d1/14954797e2a976083182c2e7da9b4e924509e59b6e5c661061
Successfully built geographiclib
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.49 geopy-1.18.1


In [112]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='NBA-Schedule')

In [123]:
# Group the team city names in the dataset 
city_list = (global_scoreboard_df.loc[:,['HOME_TEAM_CITY', 'HOME_TEAM_STATE', 'HOME_TEAM_NAME']]
                                 .groupby(['HOME_TEAM_CITY','HOME_TEAM_STATE', 'HOME_TEAM_NAME'])
                                 .count()
                                 .reset_index())

In [124]:
city_list

Unnamed: 0,HOME_TEAM_CITY,HOME_TEAM_STATE,HOME_TEAM_NAME
0,Atlanta,Atlanta,Atlanta Hawks
1,Boston,Massachusetts,Boston Celtics
2,Brooklyn,New York,Brooklyn Nets
3,Charlotte,North Carolina,Charlotte Hornets
4,Chicago,Illinois,Chicago Bulls
5,Cleveland,Ohio,Cleveland Cavaliers
6,Dallas,Texas,Dallas Mavericks
7,Denver,Colorado,Denver Nuggets
8,Detroit,Michigan,Detroit Pistons
9,Golden State,California,Golden State Warriors


In [127]:
import time

# Lists to store latitude and longitude values
lats = []
lons = []

# Iterate through each row of the city DataFrame
for ix, row in city_list.iterrows():
    
    # Create city, state string
    # (manually handle Atlanta, Atlanta and Golden State, California datapoints)
    if row.HOME_TEAM_CITY == 'Atlanta':
        city_comma_state = f"{row.HOME_TEAM_CITY}, Georgia"
    elif row.HOME_TEAM_CITY == 'Golden State':
        city_comma_state = f"Oakland, {row.HOME_TEAM_STATE}"
    else:
        city_comma_state = f"{row.HOME_TEAM_CITY}, {row.HOME_TEAM_STATE}"
    
    # Get location data
    location = geolocator.geocode(city_comma_state)
    print(f"{city_comma_state} - {location.latitude}, {location.longitude}")
    
    # Store latitude and longitude information 
    lats.append(location.latitude)
    lons.append(location.longitude)
    
    # Pause for a few seconds
    time.sleep(3)

Atlanta, Georgia - 33.7490987, -84.3901849
Boston, Massachusetts - 42.3602534, -71.0582912
Brooklyn, New York - 40.6501038, -73.9495823
Charlotte, North Carolina - 35.2270869, -80.8431268
Chicago, Illinois - 41.8755616, -87.6244212
Cleveland, Ohio - 41.5051613, -81.6934446
Dallas, Texas - 32.7762719, -96.7968559
Denver, Colorado - 39.7392364, -104.9848623
Detroit, Michigan - 42.3315509, -83.0466403
Oakland, California - 37.8044557, -122.2713563
Houston, Texas - 29.7589382, -95.3676974
Indiana, Indiana - 40.6214551, -79.1525349
Los Angeles, California - 34.0536834, -118.2427669
Los Angeles, California - 34.0536834, -118.2427669
Memphis, Tennessee - 35.1490215, -90.0516285
Miami, Florida - 25.7742658, -80.1936589
Milwaukee, Wisconsin - 43.0349931, -87.922497
Minnesota, Minnesota - 44.98632495, -93.179462592857
New Orleans, Louisiana - 29.9499323, -90.0701156
New York, New York - 40.7308619, -73.9871558
Oklahoma City, Oklahoma - 35.4729886, -97.5170536
Orlando, Florida - 28.5421097, -81.3

In [128]:
# Add lats and lons to city DataFrame
city_list.loc[:, 'lat'] = lats
city_list.loc[:, 'lon'] = lons

In [129]:
city_list

Unnamed: 0,HOME_TEAM_CITY,HOME_TEAM_STATE,HOME_TEAM_NAME,lat,lon
0,Atlanta,Atlanta,Atlanta Hawks,33.749099,-84.390185
1,Boston,Massachusetts,Boston Celtics,42.360253,-71.058291
2,Brooklyn,New York,Brooklyn Nets,40.650104,-73.949582
3,Charlotte,North Carolina,Charlotte Hornets,35.227087,-80.843127
4,Chicago,Illinois,Chicago Bulls,41.875562,-87.624421
5,Cleveland,Ohio,Cleveland Cavaliers,41.505161,-81.693445
6,Dallas,Texas,Dallas Mavericks,32.776272,-96.796856
7,Denver,Colorado,Denver Nuggets,39.739236,-104.984862
8,Detroit,Michigan,Detroit Pistons,42.331551,-83.04664
9,Golden State,California,Golden State Warriors,37.804456,-122.271356


In [142]:
import plotly.offline as pyo
import plotly.graph_objs as go

pyo.init_notebook_mode(connected=True)

data = [go.Scattergeo(
            lat=city_list['lat'],
            lon=city_list['lon'],
            locationmode="USA-states",
            text=city_list['HOME_TEAM_NAME'],
            mode='markers')
       ]

layout = go.Layout(title='NBA Team Locations',
                   geo={'scope': 'usa'}
                  )

fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)