In [37]:
#DataCollection: The program scrapes the web for NBA data and performs
#additional column maniplation to preprocess data. The data for each team's
#offense, defense, and strength of schedule are saved in CSV files for
#use by the Project notebook.
#CSE 482 Term Project
#Ian Murray
#Keaton Coffman

#---Static Details---
import pandas as p
from nba_api.stats.endpoints import leaguedashteamstats, teamdashboardbyopponent
from nba_api.stats.static import teams
import json, struct, time, csv
timeout = 0.75 #How long to wait between web scrapes to avoid timeout

In [38]:
#---Read in offensive data for each team and save as csv---

#API call
print("Fetching Offensive Data")
rawOffense = json.loads(leaguedashteamstats.LeagueDashTeamStats(100, "Base").get_json())

#Define headers
headers = ['Team','W','L','FGA','FTA','ORB','TOV', 'PTS']

#Delete unnecessary data and trim
del rawOffense["resource"]
del rawOffense["parameters"]
del rawOffense["resultSets"][0]["headers"]
del rawOffense["resultSets"][0]["name"]
rawOffense = rawOffense["resultSets"][0]["rowSet"]

#Append necessary columns
Selected_Cols = []
for item in rawOffense:
    #Offense table uses LA instead of Los Angeles
    if item[1] == "LA Clippers":
        item[1] = "Los Angeles Clippers"
    new_list = []
    new_list.append(item[1])
    new_list.append(item[3])
    new_list.append(item[4])
    new_list.append(item[8])
    new_list.append(item[14])
    new_list.append(item[16])
    new_list.append(item[20])
    new_list.append(item[26])
    Selected_Cols.append(new_list)
    
#Convert to DataFrame
rawOffense = Selected_Cols
dataOffense = p.DataFrame(rawOffense,columns=headers)

#Add Possessions, points-per-possession, and win-loss Ratio
dataOffense['PossOffense'] = dataOffense['FGA'] - dataOffense['ORB'] + dataOffense['TOV'] + (0.44 * dataOffense['FTA'])
dataOffense['PPPOffense'] = dataOffense['PTS'] / dataOffense['PossOffense']
dataOffense['WLRatio'] = dataOffense['W'] / (dataOffense['W'] + dataOffense['L'])
dataOffense["PPG"] = dataOffense["PPPOffense"] * (dataOffense["PossOffense"] / (dataOffense["W"] + dataOffense["L"]))

#Drop data we no longer need
dataOffense = dataOffense.drop(['FGA', 'FTA', 'ORB', 'TOV', 'PTS'], axis = 1)

#Save to csv
print("Saving Offensive CSV")
dataOffense.to_csv("offense.csv", encoding = 'utf-8', index = False)

print("Done!")

Fetching Offensive Data
Saving Offensive CSV
Done!


In [39]:
#---Read in defensive data for each team---

#---Static data: The database only has NBA teams by ID, not name
string_num = str(1610612737)
Selected_Cols = []
teamDict = {"1610612737":'Atlanta Hawks', "1610612738": 'Boston Celtics', "1610612739": 'Cleveland Cavaliers',
            "1610612740": 'New Orleans Pelicans', "1610612741": 'Chicago Bulls', "1610612742": 'Dallas Mavericks',
            "1610612743": 'Denver Nuggets', "1610612744": 'Golden State Warriors', "1610612745": 'Houston Rockets',
            "1610612746": 'Los Angeles Clippers', "1610612747": 'Los Angeles Lakers', "1610612748": 'Miami Heat',
            "1610612749": 'Milwaukee Bucks', "1610612750": 'Minnesota Timberwolves', "1610612751": 'Brooklyn Nets',
            "1610612752": 'New York Knicks', "1610612753": 'Orlando Magic', "1610612754": 'Indiana Pacers',
            "1610612755": 'Philadelphia 76ers', "1610612756": 'Phoenix Suns', "1610612757": 'Portland Trail Blazers',
            "1610612758": 'Sacramento Kings', "1610612759": 'San Antonio Spurs', "1610612760": 'Oklahoma City Thunder',
            "1610612761": 'Toronto Raptors', "1610612762": 'Utah Jazz', "1610612763": 'Memphis Grizzlies',
            "1610612764": 'Washington Wizards', "1610612765": 'Detroit Pistons', "1610612766": 'Charlotte Hornets'}

#Define headers
headers = ['Team','FGA','ORB','TOV', 'FTA', 'PTS']

#For each team, grab their information
for k in range(0, len(teamDict)):
    #API call
    time.sleep(timeout) #Avoid getting blacklisted!
    print("Fetching " + currTeamName + ", " + str(k + 1) + " of " + str(len(teamDict)))
    Opp = json.loads(teamdashboardbyopponent.TeamDashboardByOpponent(string_num).get_json())

    #Populate data
    currTeamName = teamDict[string_num]
    new_list = []
    new_list.append(currTeamName)
    new_list.append(Opp["resultSets"][0]["rowSet"][0][8])
    new_list.append(Opp["resultSets"][0]["rowSet"][0][16])
    new_list.append(Opp["resultSets"][0]["rowSet"][0][20])
    new_list.append(Opp["resultSets"][0]["rowSet"][0][14])
    new_list.append(Opp["resultSets"][0]["rowSet"][0][26])
    Selected_Cols.append(new_list)
    string_num = int(string_num)
    string_num += 1
    string_num = str(string_num)
    
#Convery to DataFrame
dataDefense = p.DataFrame(Selected_Cols,columns=headers)

#Add allowed possessions and allowed points-per-possession
dataDefense['PossDefense'] = dataDefense['FGA'] - dataDefense['ORB'] + dataDefense['TOV'] + (0.44 * dataDefense['FTA'])
dataDefense['PPPDefense'] = dataDefense['PTS'] / dataDefense['PossDefense']

#Drop data we no longer need
dataDefense = dataDefense.drop(['FGA', 'FTA', 'ORB', 'TOV', 'PTS'], axis = 1)

#Save to CSV
print("Saving Defensive CSV")
dataDefense.to_csv("defense.csv", encoding = 'utf-8', index = False)

print("Done!")

Fetching Charlotte Hornets, 1 of 30
Fetching Atlanta Hawks, 2 of 30
Fetching Boston Celtics, 3 of 30
Fetching Cleveland Cavaliers, 4 of 30
Fetching New Orleans Pelicans, 5 of 30
Fetching Chicago Bulls, 6 of 30
Fetching Dallas Mavericks, 7 of 30
Fetching Denver Nuggets, 8 of 30
Fetching Golden State Warriors, 9 of 30
Fetching Houston Rockets, 10 of 30
Fetching Los Angeles Clippers, 11 of 30
Fetching Los Angeles Lakers, 12 of 30
Fetching Miami Heat, 13 of 30
Fetching Milwaukee Bucks, 14 of 30
Fetching Minnesota Timberwolves, 15 of 30
Fetching Brooklyn Nets, 16 of 30
Fetching New York Knicks, 17 of 30
Fetching Orlando Magic, 18 of 30
Fetching Indiana Pacers, 19 of 30
Fetching Philadelphia 76ers, 20 of 30
Fetching Phoenix Suns, 21 of 30
Fetching Portland Trail Blazers, 22 of 30
Fetching Sacramento Kings, 23 of 30
Fetching San Antonio Spurs, 24 of 30
Fetching Oklahoma City Thunder, 25 of 30
Fetching Toronto Raptors, 26 of 30
Fetching Utah Jazz, 27 of 30
Fetching Memphis Grizzlies, 28 of 30


In [40]:
#---Read in strength of schedule (SOS) data for each team---

#This data is not availabe via API call so it is saved locally
print("Fetching SOS Data")
record = p.read_csv("record.csv", skiprows = 1, header="infer")

#Drop data we don't need
record = record.drop(['Rk', 'W', 'L', 'Age', 'PW', 'PL', 'MOV', 'SRS', 'ORtg', 'TOV%', 'ORB%', 'FT/FGA',
                  'eFG%.1', 'TOV%.1', 'DRB%', 'FT/FGA.1', 'Arena', 'Attend.', 'Attend./G', 'DRtg',
                    'Pace', '3PAr', 'TS%', 'eFG%', 'FTr'], axis = 1)

#Add Strength-of-schedule column
record['SOS'] += abs(record['SOS'].min()) 

#Save to CSV
print("Saving SOS CSV")
record.to_csv("SOS.csv", encoding = 'utf-8', index = False)

print("Done!")

Fetching SOS Data
Saving SOS CSV
Done!
