In [1]:
from bs4 import BeautifulSoup
import requests

from IPython.core.display import display, HTML

import pandas as pd

import re

import numpy as np

import datetime

In [2]:
#Retrieve breakdown of games from october to end of march

urlList = [
    'https://www.basketball-reference.com/leagues/NBA_2018_games-october.html',
    'https://www.basketball-reference.com/leagues/NBA_2018_games-november.html',
    'https://www.basketball-reference.com/leagues/NBA_2018_games-december.html',
    'https://www.basketball-reference.com/leagues/NBA_2018_games-january.html',
    'https://www.basketball-reference.com/leagues/NBA_2018_games-february.html',
    'https://www.basketball-reference.com/leagues/NBA_2018_games-march.html'
]
soupList = []
for url in urlList:
    response = requests.get(url)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    soupList.append(soup)
[soup1,soup2,soup3,soup4,soup5,soup6] = soupList

In [3]:
headerData = soup1.find(class_ = 'overthrow table_container').find_all('tr')[0].find_all('th')

In [4]:
column_headers = []

#get headers
for header in headerData:
    column_headers.append(header['data-stat'])
column_headers

['date_game',
 'game_start_time',
 'visitor_team_name',
 'visitor_pts',
 'home_team_name',
 'home_pts',
 'box_score_text',
 'overtimes',
 'attendance',
 'game_remarks']

In [5]:
fullDataList = []

for soup in soupList:
    cellData = soup.find(class_ = 'overthrow table_container').find_all('tr')
    for row in range(1,len(cellData)):
        rowList = []
        for cell in cellData[row].find_all(lambda tag: tag.name == 'td' or tag.name == 'th'):
            if cell.contents == []:
                value = ''
            elif cell.contents[0].name == 'a':
                if cell.contents[0].contents[0] == 'Box Score':
                    value = 'https://www.basketball-reference.com'+ cell.contents[0]['href']
                else:
                    value = cell.contents[0].contents[0]
            else:
                value = cell.contents[0]
            rowList.append(value)
        fullDataList.append(rowList)

cleanCellData = [x for x in fullDataList if x != []]

cleanCellData

[['Tue, Oct 17, 2017',
  '8:01p',
  'Boston Celtics',
  '99',
  'Cleveland Cavaliers',
  '102',
  'https://www.basketball-reference.com/boxscores/201710170CLE.html',
  '',
  '20,562',
  ''],
 ['Tue, Oct 17, 2017',
  '10:30p',
  'Houston Rockets',
  '122',
  'Golden State Warriors',
  '121',
  'https://www.basketball-reference.com/boxscores/201710170GSW.html',
  '',
  '19,596',
  ''],
 ['Wed, Oct 18, 2017',
  '7:00p',
  'Charlotte Hornets',
  '90',
  'Detroit Pistons',
  '102',
  'https://www.basketball-reference.com/boxscores/201710180DET.html',
  '',
  '20,491',
  ''],
 ['Wed, Oct 18, 2017',
  '7:00p',
  'Brooklyn Nets',
  '131',
  'Indiana Pacers',
  '140',
  'https://www.basketball-reference.com/boxscores/201710180IND.html',
  '',
  '15,008',
  ''],
 ['Wed, Oct 18, 2017',
  '7:00p',
  'Miami Heat',
  '109',
  'Orlando Magic',
  '116',
  'https://www.basketball-reference.com/boxscores/201710180ORL.html',
  '',
  '18,846',
  ''],
 ['Wed, Oct 18, 2017',
  '7:00p',
  'Philadelphia 76ers

In [6]:
#Combine Header and Data to create intiial DF

df = pd.DataFrame(cleanCellData,columns = column_headers)

(1142, 11)

In [7]:
def date_change(row):
    t = datetime.datetime.strptime(row['date_game'].replace(',',''), "%a %b %d %Y")
    convert_date = t.strftime('%m/%d/%Y')
    return(convert_date)

df['Date'] = pd.to_datetime(df.apply(date_change,axis = 1))

In [8]:
nbaDict = {
'Atlanta Hawks': 'ATL',
'Brooklyn Nets': 'BRK',
'Boston Celtics': 'BOS',
'Charlotte Hornets': 'CHO',
'Chicago Bulls': 'CHI',
'Cleveland Cavaliers': 'CLE',
'Dallas Mavericks': 'DAL',
'Denver Nuggets': 'DEN',
'Detroit Pistons': 'DET',
'Golden State Warriors': 'GSW',
'Houston Rockets': 'HOU',
'Indiana Pacers': 'IND',
'Los Angeles Clippers': 'LAC',
'Los Angeles Lakers': 'LAL',
'Memphis Grizzlies': 'MEM',
'Miami Heat': 'MIA',
'Milwaukee Bucks': 'MIL',
'Minnesota Timberwolves': 'MIN',
'New Orleans Pelicans': 'NOP',
'New York Knicks': 'NYK',
'Oklahoma City Thunder': 'OKC',
'Orlando Magic': 'ORL',
'Philadelphia 76ers': 'PHI',
'Phoenix Suns': 'PHO',
'Portland Trail Blazers': 'POR',
'Sacramento Kings': 'SAC',
'San Antonio Spurs': 'SAS',
'Toronto Raptors': 'TOR',
'Utah Jazz': 'UTA',
'Washington Wizards': 'WAS'}

In [9]:
statColumns = ['vis FG','vis FGA','vis FG %','vis 3P','vis 3PA',
    'vis 3P%','vis FT','vis FTA','vis FT%','vis ORB',
    'vis DRB','vis TRB','vis AST','vis STL','vis BLK',
    'vis TOV','vis PF','home FG','home FGA','home FG %',
    'home 3P','home 3PA','home 3P%','home FT','home FTA',
    'home FT%','home ORB','home DRB','home TRB','home AST',
    'home STL','home BLK','home TOV','home PF']

In [10]:
def statRecorder(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    print(team_list)
    statsList = []
    for team in team_list:
        nbaTeam = nbaDict[team]
        boxScoreLink = "box-{}-game-basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [11]:
#Create new stats list incrementally and then concatenate after
stats_list = []

In [12]:
zeroBatch = df[0:1].apply(statRecorder,axis = 1)
for i in zeroBatch:
    stats_list.insert(0,i)

['Boston Celtics', 'Cleveland Cavaliers']


In [13]:
firstBatch = df[1:101].apply(statRecorder,axis = 1)
for i in firstBatch:
    stats_list.append(i)

['Houston Rockets', 'Golden State Warriors']
['Charlotte Hornets', 'Detroit Pistons']
['Brooklyn Nets', 'Indiana Pacers']
['Miami Heat', 'Orlando Magic']
['Philadelphia 76ers', 'Washington Wizards']
['Milwaukee Bucks', 'Boston Celtics']
['New Orleans Pelicans', 'Memphis Grizzlies']
['Atlanta Hawks', 'Dallas Mavericks']
['Denver Nuggets', 'Utah Jazz']
['Minnesota Timberwolves', 'San Antonio Spurs']
['Portland Trail Blazers', 'Phoenix Suns']
['Houston Rockets', 'Sacramento Kings']
['Chicago Bulls', 'Toronto Raptors']
['New York Knicks', 'Oklahoma City Thunder']
['Los Angeles Clippers', 'Los Angeles Lakers']
['Atlanta Hawks', 'Charlotte Hornets']
['Portland Trail Blazers', 'Indiana Pacers']
['Cleveland Cavaliers', 'Milwaukee Bucks']
['Boston Celtics', 'Philadelphia 76ers']
['Detroit Pistons', 'Washington Wizards']
['Orlando Magic', 'Brooklyn Nets']
['Utah Jazz', 'Minnesota Timberwolves']
['Sacramento Kings', 'Dallas Mavericks']
['Golden State Warriors', 'New Orleans Pelicans']
['Los Angel

In [14]:
secondBatch = df[101:201].apply(statRecorder,axis = 1)
for i in secondBatch:
    stats_list.append(i)

['Phoenix Suns', 'Brooklyn Nets']
['Oklahoma City Thunder', 'Milwaukee Bucks']
['Detroit Pistons', 'Los Angeles Lakers']
['Milwaukee Bucks', 'Charlotte Hornets']
['Indiana Pacers', 'Cleveland Cavaliers']
['Atlanta Hawks', 'Philadelphia 76ers']
['Phoenix Suns', 'Washington Wizards']
['Sacramento Kings', 'Boston Celtics']
['Chicago Bulls', 'Miami Heat']
['Orlando Magic', 'Memphis Grizzlies']
['Minnesota Timberwolves', 'New Orleans Pelicans']
['Houston Rockets', 'New York Knicks']
['Toronto Raptors', 'Denver Nuggets']
['Portland Trail Blazers', 'Utah Jazz']
['Dallas Mavericks', 'Los Angeles Clippers']
['Golden State Warriors', 'San Antonio Spurs']
['Los Angeles Lakers', 'Portland Trail Blazers']
['Milwaukee Bucks', 'Detroit Pistons']
['Chicago Bulls', 'Orlando Magic']
['Indiana Pacers', 'Philadelphia 76ers']
['Cleveland Cavaliers', 'Washington Wizards']
['Houston Rockets', 'Atlanta Hawks']
['Phoenix Suns', 'New York Knicks']
['New Orleans Pelicans', 'Dallas Mavericks']
['Charlotte Hornets

In [15]:
thirdBatch = df[201:301].apply(statRecorder,axis = 1)
for i in thirdBatch:
    stats_list.append(i)

['Toronto Raptors', 'Houston Rockets']
['San Antonio Spurs', 'Dallas Mavericks']
['Sacramento Kings', 'Atlanta Hawks']
['Washington Wizards', 'Miami Heat']
['Utah Jazz', 'New York Knicks']
['Cleveland Cavaliers', 'Charlotte Hornets']
['Indiana Pacers', 'Memphis Grizzlies']
['Detroit Pistons', 'Milwaukee Bucks']
['San Antonio Spurs', 'Minnesota Timberwolves']
['Toronto Raptors', 'New Orleans Pelicans']
['Chicago Bulls', 'Oklahoma City Thunder']
['Orlando Magic', 'Portland Trail Blazers']
['Philadelphia 76ers', 'Los Angeles Lakers']
['Golden State Warriors', 'Boston Celtics']
['Houston Rockets', 'Phoenix Suns']
['Detroit Pistons', 'Indiana Pacers']
['Miami Heat', 'Washington Wizards']
['Utah Jazz', 'Brooklyn Nets']
['Los Angeles Clippers', 'Cleveland Cavaliers']
['New York Knicks', 'Toronto Raptors']
['Charlotte Hornets', 'Chicago Bulls']
['Oklahoma City Thunder', 'San Antonio Spurs']
['Minnesota Timberwolves', 'Dallas Mavericks']
['Portland Trail Blazers', 'Sacramento Kings']
['New Orle

In [16]:
fourthBatch = df[301:401].apply(statRecorder,axis = 1)
for i in fourthBatch:
    stats_list.append(i)

['Denver Nuggets', 'Utah Jazz']
['Phoenix Suns', 'Detroit Pistons']
['Oklahoma City Thunder', 'Orlando Magic']
['Washington Wizards', 'Philadelphia 76ers']
['Miami Heat', 'New York Knicks']
['Charlotte Hornets', 'Toronto Raptors']
['Indiana Pacers', 'Houston Rockets']
['Minnesota Timberwolves', 'New Orleans Pelicans']
['Brooklyn Nets', 'Dallas Mavericks']
['Memphis Grizzlies', 'San Antonio Spurs']
['Golden State Warriors', 'Los Angeles Lakers']
['Cleveland Cavaliers', 'Atlanta Hawks']
['Philadelphia 76ers', 'Boston Celtics']
['Chicago Bulls', 'Denver Nuggets']
['Milwaukee Bucks', 'Portland Trail Blazers']
['Utah Jazz', 'Los Angeles Clippers']
['Golden State Warriors', 'Orlando Magic']
['Detroit Pistons', 'Washington Wizards']
['Indiana Pacers', 'Toronto Raptors']
['Sacramento Kings', 'Chicago Bulls']
['San Antonio Spurs', 'Memphis Grizzlies']
['Charlotte Hornets', 'Miami Heat']
['Minnesota Timberwolves', 'Oklahoma City Thunder']
['New Orleans Pelicans', 'Utah Jazz']
['Phoenix Suns', 'B

In [17]:
fifthBatch = df[401:501].apply(statRecorder,axis = 1)
for i in fifthBatch:
    stats_list.append(i)

['Philadelphia 76ers', 'Minnesota Timberwolves']
['Phoenix Suns', 'Sacramento Kings']
['Oklahoma City Thunder', 'Indiana Pacers']
['Los Angeles Clippers', 'Orlando Magic']
['Memphis Grizzlies', 'Washington Wizards']
['Denver Nuggets', 'Boston Celtics']
['Portland Trail Blazers', 'Miami Heat']
['Utah Jazz', 'Chicago Bulls']
['Milwaukee Bucks', 'New Orleans Pelicans']
['Toronto Raptors', 'Phoenix Suns']
['Charlotte Hornets', 'Houston Rockets']
['Detroit Pistons', 'Atlanta Hawks']
['New York Knicks', 'Brooklyn Nets']
['Los Angeles Lakers', 'Cleveland Cavaliers']
['Sacramento Kings', 'Minnesota Timberwolves']
['Dallas Mavericks', 'Golden State Warriors']
['Miami Heat', 'Charlotte Hornets']
['Detroit Pistons', 'Indiana Pacers']
['Portland Trail Blazers', 'Orlando Magic']
['Oklahoma City Thunder', 'Philadelphia 76ers']
['Los Angeles Clippers', 'Washington Wizards']
['Utah Jazz', 'Boston Celtics']
['Brooklyn Nets', 'Toronto Raptors']
['Atlanta Hawks', 'Memphis Grizzlies']
['Chicago Bulls', 'M

In [18]:
sixthBatch = df[501:548].apply(statRecorder,axis = 1)
for i in sixthBatch:
    stats_list.append(i)

['Brooklyn Nets', 'San Antonio Spurs']
['Utah Jazz', 'Denver Nuggets']
['Memphis Grizzlies', 'Phoenix Suns']
['Sacramento Kings', 'Los Angeles Clippers']
['Boston Celtics', 'Charlotte Hornets']
['Dallas Mavericks', 'Indiana Pacers']
['Washington Wizards', 'Atlanta Hawks']
['New York Knicks', 'Chicago Bulls']
['Denver Nuggets', 'Minnesota Timberwolves']
['Brooklyn Nets', 'New Orleans Pelicans']
['Toronto Raptors', 'Oklahoma City Thunder']
['Cleveland Cavaliers', 'Sacramento Kings']
['Utah Jazz', 'Golden State Warriors']
['Memphis Grizzlies', 'Los Angeles Lakers']
['Detroit Pistons', 'Orlando Magic']
['Houston Rockets', 'Boston Celtics']
['Minnesota Timberwolves', 'Milwaukee Bucks']
['New York Knicks', 'San Antonio Spurs']
['Philadelphia 76ers', 'Portland Trail Blazers']
['Houston Rockets', 'Washington Wizards']
['Atlanta Hawks', 'Toronto Raptors']
['Indiana Pacers', 'Chicago Bulls']
['Brooklyn Nets', 'Miami Heat']
['Dallas Mavericks', 'New Orleans Pelicans']
['Milwaukee Bucks', 'Oklahom

In [19]:
nbaDictLower = {
'Atlanta Hawks': 'atl',
'Brooklyn Nets': 'brk',
'Boston Celtics': 'bos',
'Charlotte Hornets': 'cho',
'Chicago Bulls': 'chi',
'Cleveland Cavaliers': 'cle',
'Dallas Mavericks': 'dal',
'Denver Nuggets': 'den',
'Detroit Pistons': 'det',
'Golden State Warriors': 'gsw',
'Houston Rockets': 'hou',
'Indiana Pacers': 'ind',
'Los Angeles Clippers': 'lac',
'Los Angeles Lakers': 'lal',
'Memphis Grizzlies': 'mem',
'Miami Heat': 'mia',
'Milwaukee Bucks': 'mil',
'Minnesota Timberwolves': 'min',
'New Orleans Pelicans': 'nop',
'New York Knicks': 'nyk',
'Oklahoma City Thunder': 'okc',
'Orlando Magic': 'orl',
'Philadelphia 76ers': 'phi',
'Phoenix Suns': 'pho',
'Portland Trail Blazers': 'por',
'Sacramento Kings': 'sac',
'San Antonio Spurs': 'sas',
'Toronto Raptors': 'tor',
'Utah Jazz': 'uta',
'Washington Wizards': 'was'}

In [20]:
def statRecorder2(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    statsList = []
    for team in team_list:
        nbaTeam = nbaDictLower[team]
        boxScoreLink = "box_{}_basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [21]:
seventhBatch = df[548:701].apply(statRecorder,axis = 1)
for i in seventhBatch:
    stats_list.append(i)

['Portland Trail Blazers', 'Cleveland Cavaliers']
['San Antonio Spurs', 'New York Knicks']
['Atlanta Hawks', 'Phoenix Suns']
['Charlotte Hornets', 'Sacramento Kings']
['Memphis Grizzlies', 'Los Angeles Clippers']
['Houston Rockets', 'Orlando Magic']
['San Antonio Spurs', 'Philadelphia 76ers']
['New York Knicks', 'Washington Wizards']
['Minnesota Timberwolves', 'Brooklyn Nets']
['Detroit Pistons', 'Miami Heat']
['Cleveland Cavaliers', 'Boston Celtics']
['Toronto Raptors', 'Chicago Bulls']
['Indiana Pacers', 'Milwaukee Bucks']
['Golden State Warriors', 'Dallas Mavericks']
['Phoenix Suns', 'Denver Nuggets']
['New Orleans Pelicans', 'Utah Jazz']
['Oklahoma City Thunder', 'Los Angeles Lakers']
['Golden State Warriors', 'Houston Rockets']
['Oklahoma City Thunder', 'Los Angeles Clippers']
['Minnesota Timberwolves', 'Boston Celtics']
['Detroit Pistons', 'Philadelphia 76ers']
['New York Knicks', 'Miami Heat']
['Toronto Raptors', 'Milwaukee Bucks']
['Chicago Bulls', 'Dallas Mavericks']
['Phoenix

In [22]:
eighthBatch = df[701:801].apply(statRecorder,axis = 1)
for i in eighthBatch:
    stats_list.append(i)

['Utah Jazz', 'Detroit Pistons']
['Phoenix Suns', 'Indiana Pacers']
['Chicago Bulls', 'Philadelphia 76ers']
['Toronto Raptors', 'Atlanta Hawks']
['Houston Rockets', 'Dallas Mavericks']
['San Antonio Spurs', 'Memphis Grizzlies']
['Minnesota Timberwolves', 'Portland Trail Blazers']
['Boston Celtics', 'Los Angeles Clippers']
['Sacramento Kings', 'Miami Heat']
['Washington Wizards', 'Oklahoma City Thunder']
['New York Knicks', 'Denver Nuggets']
['Minnesota Timberwolves', 'Golden State Warriors']
['Atlanta Hawks', 'Charlotte Hornets']
['Indiana Pacers', 'Cleveland Cavaliers']
['Utah Jazz', 'Toronto Raptors']
['Los Angeles Lakers', 'Chicago Bulls']
['Los Angeles Clippers', 'Memphis Grizzlies']
['Brooklyn Nets', 'Milwaukee Bucks']
['Houston Rockets', 'New Orleans Pelicans']
['Portland Trail Blazers', 'Dallas Mavericks']
['Philadelphia 76ers', 'San Antonio Spurs']
['New York Knicks', 'Phoenix Suns']
['Oklahoma City Thunder', 'Detroit Pistons']
['Orlando Magic', 'Indiana Pacers']
['Washington W

In [23]:
ninthBatch = df[801:901].apply(statRecorder,axis = 1)
for i in ninthBatch:
    stats_list.append(i)

['Washington Wizards', 'Philadelphia 76ers']
['Oklahoma City Thunder', 'Golden State Warriors']
['Phoenix Suns', 'Los Angeles Lakers']
['Brooklyn Nets', 'Detroit Pistons']
['Houston Rockets', 'Miami Heat']
['Minnesota Timberwolves', 'Cleveland Cavaliers']
['Utah Jazz', 'Memphis Grizzlies']
['San Antonio Spurs', 'Phoenix Suns']
['Atlanta Hawks', 'Orlando Magic']
['New York Knicks', 'Toronto Raptors']
['Boston Celtics', 'Washington Wizards']
['Charlotte Hornets', 'Portland Trail Blazers']
['Dallas Mavericks', 'Golden State Warriors']
['Oklahoma City Thunder', 'Los Angeles Lakers']
['Los Angeles Clippers', 'Detroit Pistons']
['New Orleans Pelicans', 'Philadelphia 76ers']
['Cleveland Cavaliers', 'Atlanta Hawks']
['Indiana Pacers', 'Boston Celtics']
['Denver Nuggets', 'Houston Rockets']
['Milwaukee Bucks', 'Miami Heat']
['Charlotte Hornets', 'Utah Jazz']
['Minnesota Timberwolves', 'Chicago Bulls']
['Portland Trail Blazers', 'Sacramento Kings']
['New Orleans Pelicans', 'Brooklyn Nets']
['Mil

In [24]:
tenthBatch = df[901:1001].apply(statRecorder,axis = 1)
for i in tenthBatch:
    stats_list.append(i)

['Orlando Magic', 'Oklahoma City Thunder']
['Indiana Pacers', 'Dallas Mavericks']
['Houston Rockets', 'Utah Jazz']
['Minnesota Timberwolves', 'Sacramento Kings']
['Chicago Bulls', 'Charlotte Hornets']
['Brooklyn Nets', 'Cleveland Cavaliers']
['Philadelphia 76ers', 'Miami Heat']
['Washington Wizards', 'Milwaukee Bucks']
['Sacramento Kings', 'Portland Trail Blazers']
['Los Angeles Clippers', 'Denver Nuggets']
['Milwaukee Bucks', 'Detroit Pistons']
['Toronto Raptors', 'Orlando Magic']
['Indiana Pacers', 'Atlanta Hawks']
['Charlotte Hornets', 'Boston Celtics']
['Phoenix Suns', 'Memphis Grizzlies']
['Golden State Warriors', 'Washington Wizards']
['Oklahoma City Thunder', 'Dallas Mavericks']
['New Orleans Pelicans', 'San Antonio Spurs']
['Houston Rockets', 'Los Angeles Clippers']
['Los Angeles Lakers', 'Miami Heat']
['Philadelphia 76ers', 'Cleveland Cavaliers']
['Brooklyn Nets', 'Sacramento Kings']
['Minnesota Timberwolves', 'Portland Trail Blazers']
['Detroit Pistons', 'Orlando Magic']
['Ch

In [25]:
eleventhBatch = df[1001:1152].apply(statRecorder,axis = 1)
for i in eleventhBatch:
    stats_list.append(i)

['Sacramento Kings', 'Oklahoma City Thunder']
['Miami Heat', 'Portland Trail Blazers']
['Indiana Pacers', 'Philadelphia 76ers']
['Minnesota Timberwolves', 'Washington Wizards']
['Oklahoma City Thunder', 'Atlanta Hawks']
['Toronto Raptors', 'Brooklyn Nets']
['Dallas Mavericks', 'New York Knicks']
['Los Angeles Clippers', 'Chicago Bulls']
['Charlotte Hornets', 'New Orleans Pelicans']
['Orlando Magic', 'San Antonio Spurs']
['Detroit Pistons', 'Utah Jazz']
['Cleveland Cavaliers', 'Phoenix Suns']
['Denver Nuggets', 'Los Angeles Lakers']
['Milwaukee Bucks', 'Orlando Magic']
['Washington Wizards', 'Boston Celtics']
['Miami Heat', 'Sacramento Kings']
['Los Angeles Lakers', 'Golden State Warriors']
['Toronto Raptors', 'Indiana Pacers']
['Charlotte Hornets', 'Atlanta Hawks']
['Philadelphia 76ers', 'New York Knicks']
['Los Angeles Clippers', 'Houston Rockets']
['Chicago Bulls', 'Memphis Grizzlies']
['New Orleans Pelicans', 'San Antonio Spurs']
['Detroit Pistons', 'Denver Nuggets']
['Phoenix Suns'

In [26]:
#Create new pd with stats list and stats columns
statsdf = pd.DataFrame(stats_list,columns = statColumns)
statsdf.head()

Unnamed: 0,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,36,88,0.409,8,32,0.25,19,25,0.76,9,...,25,0.84,9,41,50,19,3,4,17,25
1,47,97,0.485,15,41,0.366,13,19,0.684,10,...,21,0.905,6,35,41,34,5,9,17,25
2,29,73,0.397,9,30,0.3,23,29,0.793,3,...,12,0.917,9,38,47,24,14,3,8,21
3,45,94,0.479,12,30,0.4,29,32,0.906,11,...,32,0.781,14,33,47,29,12,9,14,25
4,44,101,0.436,8,30,0.267,13,17,0.765,11,...,29,0.759,11,39,50,22,8,9,14,18


In [85]:
#concatenate original df with stats df
resultdf = pd.concat([df, statsdf], axis=1)
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,Date,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,vis DRB,vis TRB,vis AST,vis STL,vis BLK,vis TOV,vis PF,home FG,home FGA,home FG %,home 3P,home 3PA,home 3P%,home FT,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,https://www.basketball-reference.com/boxscores...,,20562,,2017-10-17,36,88,0.409,8,32,0.25,19,25,0.76,9,37,46,24,11,4,10,24,38,83,0.458,5,22,0.227,21,25,0.84,9,41,50,19,3,4,17,25
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,https://www.basketball-reference.com/boxscores...,,19596,,2017-10-17,47,97,0.485,15,41,0.366,13,19,0.684,10,33,43,28,9,5,12,16,43,80,0.538,16,30,0.533,19,21,0.905,6,35,41,34,5,9,17,25
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,https://www.basketball-reference.com/boxscores...,,20491,,2017-10-18,29,73,0.397,9,30,0.3,23,29,0.793,3,44,47,16,4,3,17,15,41,96,0.427,9,26,0.346,11,12,0.917,9,38,47,24,14,3,8,21
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,https://www.basketball-reference.com/boxscores...,,15008,,2017-10-18,45,94,0.479,12,30,0.4,29,32,0.906,11,32,43,22,7,2,20,25,53,102,0.52,9,34,0.265,25,32,0.781,14,33,47,29,12,9,14,25
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,https://www.basketball-reference.com/boxscores...,,18846,,2017-10-18,44,101,0.436,8,30,0.267,13,17,0.765,11,33,44,27,6,4,13,20,43,90,0.478,8,21,0.381,22,29,0.759,11,39,50,22,8,9,14,18


In [28]:
#Output to csv for saving purposes
export_csv = resultdf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\stats_data.csv') #Don't forget to add '.csv' at the end of the path

In [29]:
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,https://www.basketball-reference.com/boxscores...,,20562,,...,25,0.84,9,41,50,19,3,4,17,25
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,https://www.basketball-reference.com/boxscores...,,19596,,...,21,0.905,6,35,41,34,5,9,17,25
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,https://www.basketball-reference.com/boxscores...,,20491,,...,12,0.917,9,38,47,24,14,3,8,21
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,https://www.basketball-reference.com/boxscores...,,15008,,...,32,0.781,14,33,47,29,12,9,14,25
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,https://www.basketball-reference.com/boxscores...,,18846,,...,29,0.759,11,39,50,22,8,9,14,18


In [30]:
def win_percent(row):
    testurl = row['box_score_text']
    testresponse = requests.get(testurl)
    testpage = testresponse.text
    testsoup = BeautifulSoup(testpage,"lxml")
    wins_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[0])
    losses_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[1])
    win_pct_team = wins_team/(wins_team+losses_team)
    wins_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[0])
    losses_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[1])
    win_pct_opp = wins_opp/(wins_opp+losses_opp)
    win_list = [win_pct_team, win_pct_opp]
    print(win_list)
    return(win_list)



In [31]:
win_first_batch = resultdf[0:101]
win_second_batch = resultdf[101:201]
win_third_batch = resultdf[201:301]
win_fourth_batch = resultdf[301:401]
win_fifth_batch = resultdf[401:501]
win_sixth_batch = resultdf[501:601]
win_seventh_batch = resultdf[601:701]
win_eighth_batch = resultdf[701:801]
win_ninth_batch = resultdf[801:901]
win_tenth_batch = resultdf[901:1001]
win_eleventh_batch = resultdf[1001:1152]

In [32]:
win_final_list = []
for i in win_first_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.5, 0.5]
[1.0, 0.5]
[1.0, 0.5]
[0.3333333333333333, 0.0]
[0.5, 1.0]
[0.5, 0.5]
[0.5, 0.5]
[0.5, 0.0]
[0.5, 0.0]
[0.5, 0.0]
[0.0, 1.0]
[1.0, 0.0]
[0.6666666666666666, 0.6666666666666666]
[0.0, 1.0]
[0.3333333333333333, 1.0]
[0.3333333333333333, 0.5]
[0.6666666666666666, 0.0]
[0.6666666666666666, 0.6666666666666666]
[0.3333333333333333, 0.5]
[0.5, 0.6666666666666666]
[0.0, 1.0]
[0.3333333333333333, 0.6666666666666666]
[0.6666666666666666, 0.3333333333333333]
[0.3333333333333333, 0.3333333333333333]
[0.25, 0.5]
[0.25, 0.6666666666666666]
[1.0, 0.75]
[0.3333333333333333, 0.75]
[0.5, 0.0]
[0.6666666666666666, 1.0]
[1.0, 0.3333333333333333]
[0.25, 0.25]
[0.0, 0.75]
[0.5, 0.75]
[0.0, 0.5]
[0.5, 0.5]
[0.25, 0.75]
[0.5, 1.0]
[0.25, 0.5]
[0.4, 0.6]
[0.8, 0.2]
[0.6, 0.6]
[1.0, 0.5]
[0.4, 0.5]
[0.75, 0.2]
[0.4, 0.4]
[0.5, 

In [33]:
for i in win_second_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.42857142857142855, 0.375]
[0.5714285714285714, 0.5714285714285714]
[0.625, 0.42857142857142855]
[0.5, 0.625]
[0.625, 0.375]
[0.125, 0.5]
[0.5, 0.5714285714285714]
[0.125, 0.75]
[0.16666666666666666, 0.42857142857142855]
[0.75, 0.625]
[0.625, 0.375]
[0.6666666666666666, 0.42857142857142855]
[0.5714285714285714, 0.5]
[0.5, 0.625]
[0.1111111111111111, 0.7142857142857143]
[0.6666666666666666, 0.5]
[0.375, 0.5555555555555556]
[0.4444444444444444, 0.6666666666666666]
[0.2857142857142857, 0.6666666666666666]
[0.5555555555555556, 0.5555555555555556]
[0.4444444444444444, 0.5]
[0.7, 0.1111111111111111]
[0.4444444444444444, 0.5]
[0.4444444444444444, 0.1]
[0.5555555555555556, 0.5555555555555556]
[0.375, 0.5555555555555556]
[0.625, 0.5555555555555556]
[0.7777777777777778, 0.5]
[0.3333333333333333, 0.4444444444444444]
[0.6666666666666666, 0.625]
[0.1111111111111111, 0.7]
[0.5, 0.25]
[0.09090909090909091, 0.6666666666666666]
[0.7, 0.5]
[0.2, 0.4]
[0.4444444444444444, 0.5555555555555556]
[0.8, 0.6]

In [34]:
for i in win_third_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.6153846153846154, 0.7333333333333333]
[0.6428571428571429, 0.13333333333333333]
[0.21428571428571427, 0.2]
[0.6428571428571429, 0.42857142857142855]
[0.4, 0.5714285714285714]
[0.5333333333333333, 0.38461538461538464]
[0.4666666666666667, 0.5]
[0.7142857142857143, 0.5714285714285714]
[0.6, 0.6428571428571429]
[0.6428571428571429, 0.5333333333333333]
[0.16666666666666666, 0.5]
[0.5333333333333333, 0.5714285714285714]
[0.5714285714285714, 0.4]
[0.7333333333333333, 0.875]
[0.75, 0.3125]
[0.6666666666666666, 0.5]
[0.4666666666666667, 0.6]
[0.375, 0.4]
[0.35714285714285715, 0.5625]
[0.5333333333333333, 0.6666666666666666]
[0.35714285714285715, 0.23076923076923078]
[0.4666666666666667, 0.625]
[0.6666666666666666, 0.125]
[0.5333333333333333, 0.26666666666666666]
[0.5, 0.6]
[0.35294117647058826, 0.375]
[0.3333333333333333, 0.4]
[0.4117647058823529, 0.5]
[0.8823529411764706, 0.1875]
[0.75, 0.5333333333333333]
[0.7647058823529411, 0.4666666666666667]
[0.5333333333333333, 0.17647058823529413]
[

In [35]:
for i in win_fourth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.55, 0.47619047619047616]
[0.34782608695652173, 0.7]
[0.4, 0.4090909090909091]
[0.5238095238095238, 0.6]
[0.47619047619047616, 0.5238095238095238]
[0.4, 0.65]
[0.5454545454545454, 0.8095238095238095]
[0.5909090909090909, 0.5238095238095238]
[0.38095238095238093, 0.22727272727272727]
[0.35, 0.6666666666666666]
[0.7272727272727273, 0.38095238095238093]
[0.6818181818181818, 0.19047619047619047]
[0.5714285714285714, 0.8260869565217391]
[0.15, 0.5714285714285714]
[0.55, 0.5909090909090909]
[0.5, 0.4]
[0.7391304347826086, 0.391304347826087]
[0.6666666666666666, 0.5454545454545454]
[0.5217391304347826, 0.6666666666666666]
[0.3181818181818182, 0.14285714285714285]
[0.6818181818181818, 0.3333333333333333]
[0.38095238095238093, 0.5]
[0.5652173913043478, 0.42857142857142855]
[0.5, 0.5217391304347826]
[0.3333333333333333, 0.8333333333333334]
[0.38095238095238093, 0.2608695652173913]
[0.22727272727272727, 0.36363636363636365]
[0.3181818181818182, 0.6956521739130435]
[0.6363636363636364, 0.5909090

In [36]:
for i in win_fifth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5185185185185185, 0.5714285714285714]
[0.3103448275862069, 0.3333333333333333]
[0.48148148148148145, 0.5714285714285714]
[0.4230769230769231, 0.3793103448275862]
[0.2857142857142857, 0.5357142857142857]
[0.5357142857142857, 0.8]
[0.5185185185185185, 0.48148148148148145]
[0.4642857142857143, 0.25925925925925924]
[0.5769230769230769, 0.5172413793103449]
[0.6923076923076923, 0.3]
[0.37037037037037035, 0.8461538461538461]
[0.5357142857142857, 0.21428571428571427]
[0.5357142857142857, 0.4074074074074074]
[0.37037037037037035, 0.7241379310344828]
[0.32142857142857145, 0.5862068965517241]
[0.27586206896551724, 0.7931034482758621]
[0.5, 0.35714285714285715]
[0.5517241379310345, 0.5517241379310345]
[0.5357142857142857, 0.36666666666666664]
[0.5, 0.5]
[0.4074074074074074, 0.5517241379310345]
[0.4827586206896552, 0.7741935483870968]
[0.39285714285714285, 0.7037037037037037]
[0.20689655172413793, 0.3103448275862069]
[0.2857142857142857, 0.5555555555555556]
[0.5, 0.5517241379310345]
[0.655172413

In [37]:
for i in win_sixth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.36363636363636365, 0.6857142857142857]
[0.42857142857142855, 0.5588235294117647]
[0.29411764705882354, 0.3611111111111111]
[0.3333333333333333, 0.42424242424242425]
[0.7368421052631579, 0.35294117647058826]
[0.3055555555555556, 0.5428571428571428]
[0.5428571428571428, 0.2647058823529412]
[0.5, 0.35294117647058826]
[0.5428571428571428, 0.6285714285714286]
[0.35294117647058826, 0.5294117647058824]
[0.696969696969697, 0.5714285714285714]
[0.6857142857142857, 0.35294117647058826]
[0.4166666666666667, 0.8]
[0.3142857142857143, 0.3333333333333333]
[0.5588235294117647, 0.3333333333333333]
[0.7575757575757576, 0.7435897435897436]
[0.6111111111111112, 0.5454545454545454]
[0.4857142857142857, 0.6944444444444444]
[0.4411764705882353, 0.5294117647058824]
[0.7352941176470589, 0.5555555555555556]
[0.2571428571428571, 0.7058823529411765]
[0.5277777777777778, 0.37142857142857144]
[0.37142857142857144, 0.5142857142857142]
[0.32432432432432434, 0.5142857142857142]
[0.5588235294117647, 0.5555555555555

In [38]:
for i in win_seventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.325, 0.325]
[0.3488372093023256, 0.38461538461538464]
[0.5853658536585366, 0.5121951219512195]
[0.4146341463414634, 0.5609756097560976]
[0.55, 0.36585365853658536]
[0.35714285714285715, 0.4634146341463415]
[0.5365853658536586, 0.725]
[0.5, 0.325]
[0.2857142857142857, 0.55]
[0.5238095238095238, 0.627906976744186]
[0.2682926829268293, 0.5121951219512195]
[0.475, 0.7857142857142857]
[0.7727272727272727, 0.48717948717948717]
[0.6341463414634146, 0.725]
[0.4878048780487805, 0.3170731707317073]
[0.6511627906976745, 0.34146341463414637]
[0.40476190476190477, 0.4]
[0.6190476190476191, 0.5238095238095238]
[0.27906976744186046, 0.5714285714285714]
[0.38095238095238093, 0.2619047619047619]
[0.7906976744186046, 0.5365853658536586]
[0.4523809523809524, 0.6363636363636364]
[0.5238095238095238, 0.5121951219512195]
[0.3170731707317073, 0.5238095238095238]
[0.7317073170731707, 0.37209302325581395]
[0.35714285714285715, 0.3409090909090909]
[0.30952380952380953, 0.5]
[0.5348837209302325, 0.39024390243

In [39]:
for i in win_eighth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.4166666666666667, 0.4782608695652174]
[0.3541666666666667, 0.5416666666666666]
[0.375, 0.5227272727272727]
[0.6956521739130435, 0.2978723404255319]
[0.7391304347826086, 0.3333333333333333]
[0.64, 0.3617021276595745]
[0.62, 0.5416666666666666]
[0.7142857142857143, 0.48936170212765956]
[0.3125, 0.5625]
[0.5416666666666666, 0.5833333333333334]
[0.42857142857142855, 0.5208333333333334]
[0.6078431372549019, 0.7959183673469388]
[0.2916666666666667, 0.425531914893617]
[0.5306122448979592, 0.5957446808510638]
[0.42857142857142855, 0.6808510638297872]
[0.3958333333333333, 0.3673469387755102]
[0.5, 0.3541666666666667]
[0.3673469387755102, 0.5319148936170213]
[0.723404255319149, 0.5625]
[0.5510204081632653, 0.32653061224489793]
[0.5333333333333333, 0.6274509803921569]
[0.44, 0.3469387755102041]
[0.5918367346938775, 0.46808510638297873]
[0.2916666666666667, 0.54]
[0.5510204081632653, 0.2857142857142857]
[0.4166666666666667, 0.5714285714285714]
[0.7, 0.8]
[0.32, 0.5306122448979592]
[0.36, 0.6153

In [40]:
for i in win_ninth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5740740740740741, 0.5098039215686274]
[0.5636363636363636, 0.7592592592592593]
[0.32727272727272727, 0.41509433962264153]
[0.3392857142857143, 0.5094339622641509]
[0.7547169811320755, 0.5272727272727272]
[0.5964912280701754, 0.5849056603773585]
[0.48148148148148145, 0.3333333333333333]
[0.625, 0.32142857142857145]
[0.3090909090909091, 0.3333333333333333]
[0.4107142857142857, 0.7037037037037037]
[0.7142857142857143, 0.5636363636363636]
[0.42592592592592593, 0.5454545454545454]
[0.3090909090909091, 0.7636363636363637]
[0.5535714285714286, 0.42592592592592593]
[0.5283018867924528, 0.5]
[0.5185185185185185, 0.5192307692307693]
[0.5925925925925926, 0.30357142857142855]
[0.5535714285714286, 0.7017543859649122]
[0.5272727272727272, 0.7592592592592593]
[0.5555555555555556, 0.5357142857142857]
[0.41818181818181815, 0.4909090909090909]
[0.5862068965517241, 0.35185185185185186]
[0.5535714285714286, 0.3148148148148148]
[0.5272727272727272, 0.3333333333333333]
[0.5636363636363636, 0.327272727272

In [41]:
for i in win_tenth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.3, 0.5645161290322581]
[0.5666666666666667, 0.3114754098360656]
[0.7833333333333333, 0.5081967213114754]
[0.59375, 0.3]
[0.32786885245901637, 0.45901639344262296]
[0.3225806451612903, 0.6]
[0.5423728813559322, 0.5245901639344263]
[0.5901639344262295, 0.55]
[0.29508196721311475, 0.5737704918032787]
[0.5423728813559322, 0.5409836065573771]
[0.5409836065573771, 0.47540983606557374]
[0.7166666666666667, 0.29508196721311475]
[0.5573770491803278, 0.3064516129032258]
[0.45161290322580644, 0.6984126984126984]
[0.30158730158730157, 0.3]
[0.7741935483870968, 0.5806451612903226]
[0.5714285714285714, 0.3064516129032258]
[0.5737704918032787, 0.5806451612903226]
[0.7868852459016393, 0.5333333333333333]
[0.4426229508196721, 0.5161290322580645]
[0.55, 0.5901639344262295]
[0.31746031746031744, 0.3064516129032258]
[0.5846153846153846, 0.5806451612903226]
[0.46774193548387094, 0.3064516129032258]
[0.4444444444444444, 0.5573770491803278]
[0.7777777777777778, 0.30158730158730157]
[0.30158730158730157, 0

In [42]:
for i in win_eleventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.3088235294117647, 0.5797101449275363]
[0.5294117647058824, 0.6119402985074627]
[0.5882352941176471, 0.5454545454545454]
[0.5797101449275363, 0.5588235294117647]
[0.5857142857142857, 0.29411764705882354]
[0.746268656716418, 0.3088235294117647]
[0.3235294117647059, 0.35294117647058826]
[0.5606060606060606, 0.34328358208955223]
[0.4264705882352941, 0.582089552238806]
[0.29411764705882354, 0.5588235294117647]
[0.44776119402985076, 0.5588235294117647]
[0.582089552238806, 0.2753623188405797]
[0.5441176470588235, 0.4626865671641791]
[0.5294117647058824, 0.30434782608695654]
[0.5652173913043478, 0.6764705882352942]
[0.5217391304347826, 0.3188405797101449]
[0.45588235294117646, 0.7647058823529411]
[0.75, 0.5797101449275363]
[0.43478260869565216, 0.2898550724637681]
[0.5522388059701493, 0.34782608695652173]
[0.5522388059701493, 0.7941176470588235]
[0.35294117647058826, 0.2647058823529412]
[0.5735294117647058, 0.5652173913043478]
[0.4411764705882353, 0.5507246376811594]
[0.2714285714285714, 0.

In [86]:
win_columns = ['Team_Win_Pct','Opp_Win_Pct']

win_pct_df = pd.DataFrame(win_final_list,columns = win_columns)

In [87]:
resultdf = pd.concat([resultdf, win_pct_df], axis=1)

In [88]:
resultdf

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,Date,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,vis DRB,vis TRB,vis AST,vis STL,vis BLK,vis TOV,vis PF,home FG,home FGA,home FG %,home 3P,home 3PA,home 3P%,home FT,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF,Team_Win_Pct,Opp_Win_Pct
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,https://www.basketball-reference.com/boxscores...,,20562,,2017-10-17,36,88,.409,8,32,.250,19,25,.760,9,37,46,24,11,4,10,24,38,83,.458,5,22,.227,21,25,.840,9,41,50,19,3,4,17,25,0.000000,1.000000
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,https://www.basketball-reference.com/boxscores...,,19596,,2017-10-17,47,97,.485,15,41,.366,13,19,.684,10,33,43,28,9,5,12,16,43,80,.538,16,30,.533,19,21,.905,6,35,41,34,5,9,17,25,1.000000,0.000000
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,https://www.basketball-reference.com/boxscores...,,20491,,2017-10-18,29,73,.397,9,30,.300,23,29,.793,3,44,47,16,4,3,17,15,41,96,.427,9,26,.346,11,12,.917,9,38,47,24,14,3,8,21,0.000000,1.000000
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,https://www.basketball-reference.com/boxscores...,,15008,,2017-10-18,45,94,.479,12,30,.400,29,32,.906,11,32,43,22,7,2,20,25,53,102,.520,9,34,.265,25,32,.781,14,33,47,29,12,9,14,25,0.000000,1.000000
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,https://www.basketball-reference.com/boxscores...,,18846,,2017-10-18,44,101,.436,8,30,.267,13,17,.765,11,33,44,27,6,4,13,20,43,90,.478,8,21,.381,22,29,.759,11,39,50,22,8,9,14,18,0.000000,1.000000
5,"Wed, Oct 18, 2017",7:00p,Philadelphia 76ers,115,Washington Wizards,120,https://www.basketball-reference.com/boxscores...,,20356,,2017-10-18,43,93,.462,15,35,.429,14,19,.737,11,37,48,25,6,6,17,28,42,97,.433,6,22,.273,30,38,.789,16,36,52,21,8,10,9,20,0.000000,1.000000
6,"Wed, Oct 18, 2017",7:30p,Milwaukee Bucks,108,Boston Celtics,100,https://www.basketball-reference.com/boxscores...,,18624,,2017-10-18,38,76,.500,7,21,.333,25,30,.833,6,39,45,19,6,4,15,22,39,91,.429,11,28,.393,11,21,.524,11,32,43,23,12,2,12,27,1.000000,0.000000
7,"Wed, Oct 18, 2017",8:00p,New Orleans Pelicans,91,Memphis Grizzlies,103,https://www.basketball-reference.com/boxscores...,,17794,,2017-10-18,30,79,.380,7,25,.280,24,29,.828,16,35,51,15,4,12,17,31,39,92,.424,9,29,.310,16,28,.571,15,30,45,19,9,8,11,29,0.000000,1.000000
8,"Wed, Oct 18, 2017",8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,2017-10-18,48,94,.511,9,18,.500,12,15,.800,14,36,50,20,11,6,13,18,38,86,.442,17,45,.378,18,21,.857,9,32,41,27,5,4,15,19,1.000000,0.000000
9,"Wed, Oct 18, 2017",9:00p,Denver Nuggets,96,Utah Jazz,106,https://www.basketball-reference.com/boxscores...,,17588,,2017-10-18,36,77,.468,13,27,.481,11,12,.917,7,31,38,21,4,3,20,18,41,81,.506,9,24,.375,15,16,.938,6,30,36,28,9,5,15,18,0.000000,1.000000


In [89]:
#visitor stats
visitor_df = resultdf.iloc[:,np.r_[0:28,45]]

#home stats
home_df = resultdf.iloc[:,np.r_[0:2,4:6,2:4,6:11,28:45,46]]


In [90]:
visitor_df.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,Date,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,vis DRB,vis TRB,vis AST,vis STL,vis BLK,vis TOV,vis PF,Team_Win_Pct
0,"Tue, Oct 17, 2017",8:01p,Boston Celtics,99,Cleveland Cavaliers,102,https://www.basketball-reference.com/boxscores...,,20562,,2017-10-17,36,88,0.409,8,32,0.25,19,25,0.76,9,37,46,24,11,4,10,24,0.0
1,"Tue, Oct 17, 2017",10:30p,Houston Rockets,122,Golden State Warriors,121,https://www.basketball-reference.com/boxscores...,,19596,,2017-10-17,47,97,0.485,15,41,0.366,13,19,0.684,10,33,43,28,9,5,12,16,1.0
2,"Wed, Oct 18, 2017",7:00p,Charlotte Hornets,90,Detroit Pistons,102,https://www.basketball-reference.com/boxscores...,,20491,,2017-10-18,29,73,0.397,9,30,0.3,23,29,0.793,3,44,47,16,4,3,17,15,0.0
3,"Wed, Oct 18, 2017",7:00p,Brooklyn Nets,131,Indiana Pacers,140,https://www.basketball-reference.com/boxscores...,,15008,,2017-10-18,45,94,0.479,12,30,0.4,29,32,0.906,11,32,43,22,7,2,20,25,0.0
4,"Wed, Oct 18, 2017",7:00p,Miami Heat,109,Orlando Magic,116,https://www.basketball-reference.com/boxscores...,,18846,,2017-10-18,44,101,0.436,8,30,0.267,13,17,0.765,11,33,44,27,6,4,13,20,0.0


In [94]:
home_df.head()

Unnamed: 0,date_game,game_start_time,home_team_name,home_pts,visitor_team_name,visitor_pts,box_score_text,overtimes,attendance,game_remarks,Date,home FG,home FGA,home FG %,home 3P,home 3PA,home 3P%,home FT,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF,Opp_Win_Pct,Home Team
0,"Tue, Oct 17, 2017",8:01p,Cleveland Cavaliers,102,Boston Celtics,99,https://www.basketball-reference.com/boxscores...,,20562,,2017-10-17,38,83,0.458,5,22,0.227,21,25,0.84,9,41,50,19,3,4,17,25,1.0,1
1,"Tue, Oct 17, 2017",10:30p,Golden State Warriors,121,Houston Rockets,122,https://www.basketball-reference.com/boxscores...,,19596,,2017-10-17,43,80,0.538,16,30,0.533,19,21,0.905,6,35,41,34,5,9,17,25,0.0,1
2,"Wed, Oct 18, 2017",7:00p,Detroit Pistons,102,Charlotte Hornets,90,https://www.basketball-reference.com/boxscores...,,20491,,2017-10-18,41,96,0.427,9,26,0.346,11,12,0.917,9,38,47,24,14,3,8,21,1.0,1
3,"Wed, Oct 18, 2017",7:00p,Indiana Pacers,140,Brooklyn Nets,131,https://www.basketball-reference.com/boxscores...,,15008,,2017-10-18,53,102,0.52,9,34,0.265,25,32,0.781,14,33,47,29,12,9,14,25,1.0,1
4,"Wed, Oct 18, 2017",7:00p,Orlando Magic,116,Miami Heat,109,https://www.basketball-reference.com/boxscores...,,18846,,2017-10-18,43,90,0.478,8,21,0.381,22,29,0.759,11,39,50,22,8,9,14,18,1.0,1


In [92]:
#Create bool indicator if the team is home or away
list_of_zeros = [0]*visitor_df.shape[0]
list_of_ones = [1]*home_df.shape[0]

away_bool_df = pd.DataFrame(list_of_zeros,columns = ['Home Team'])
home_bool_df = pd.DataFrame(list_of_ones,columns = ['Home Team'])

In [93]:
#Add new column for binary home or away
visitor_df = pd.concat([visitor_df, away_bool_df], axis=1)
home_df = pd.concat([home_df, home_bool_df], axis=1)

In [95]:
visitor_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']


home_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']

In [96]:
#Combine dfs and clean-up unnecessary columns
frames = [visitor_df, home_df]
agg_data = pd.concat(frames,ignore_index=True)
agg_data['date_game'] = agg_data['Date']
del agg_data['Date']

In [97]:
ordered_agg_data = agg_data.sort_values(by = ['team_name','date_game']).reset_index()

In [99]:
rolling_stat_data = agg_data.sort_values(by = ['team_name','date_game'])

testdata_grouped_rolling = rolling_stat_data.groupby('team_name')[['team_pts','opposing_team_pts','FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF']].rolling(window=30, min_periods=1).mean().reset_index()

In [100]:
del testdata_grouped_rolling['level_1']
del testdata_grouped_rolling['team_name']

In [101]:
testdata_grouped_rolling.columns = ['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF']

In [102]:
full_stats_data = pd.concat([ordered_agg_data ,testdata_grouped_rolling], axis=1)
full_stats_data.head(5)

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,game_remarks,FG,FGA,FG %,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,Team_win_pct,Home Team,Avg1 Pts,Avg1 Opp Pts,Avg1 FG,Avg1 FGA,Avg1 FG %,Avg1 3P,Avg1 3PA,Avg1 3P%,Avg1 FT,Avg1 FTA,Avg1 FT%,Avg1 ORB,Avg1 DRB,Avg1 TRB,Avg1 AST,Avg1 STL,Avg1 BLK,Avg1 TOV,Avg1 PF
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,48,94,0.511,9,18,0.5,12,15,0.8,14,36,50,20,11,6,13,18,1.0,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,36,95,0.379,7,30,0.233,12,14,0.857,6,32,38,19,11,4,13,29,0.5,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,32,94,0.34,7,27,0.259,33,36,0.917,16,32,48,21,6,2,16,25,0.333333,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,36,76,0.474,7,17,0.412,14,18,0.778,6,35,41,14,9,1,20,17,0.25,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,32,81,0.395,11,28,0.393,11,13,0.846,7,33,40,22,10,5,10,23,0.2,0,98.2,106.2,36.8,88.0,0.4198,8.2,24.0,0.3594,16.4,19.2,0.8396,9.8,33.6,43.4,19.2,9.4,3.6,14.4,22.4


In [103]:
#Shift average stats so each row has the average of the games previously
full_stats_data[['Avg Pts','Avg Opp Pts','Avg FG','Avg FGA','Avg FG %',
       'Avg 3P','Avg 3PA','Avg 3P%','Avg FT','Avg FTA','Avg FT%',
       'Avg ORB','Avg DRB','Avg TRB','Avg AST','Avg STL','Avg BLK',
       'Avg TOV','Avg PF']] = full_stats_data.groupby('team_name')['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'].apply(lambda grp: grp.shift(1))

In [104]:
#Deleted non-shifted columns

full_stats_data = full_stats_data.drop(['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'],axis =1)

In [105]:
full_stats_data['Previous Game Data'] = (full_stats_data.groupby('team_name')['date_game']
                                            .apply(lambda grp: grp.shift(1)))

In [106]:
#Do calculation for back to back games
full_stats_data['Time between games'] = full_stats_data['date_game'] - full_stats_data['Previous Game Data']

In [107]:
full_stats_data['Win_pct'] = (full_stats_data.groupby('team_name')['Team_win_pct']
                                            .apply(lambda grp: grp.shift(1)))

In [108]:
#Replicate df to join, so each line has team average and opposing team average

rep_full_stats_data = full_stats_data

rep_full_stats_data.head()

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,game_remarks,FG,FGA,FG %,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,Team_win_pct,Home Team,Avg Pts,Avg Opp Pts,Avg FG,Avg FGA,Avg FG %,Avg 3P,Avg 3PA,Avg 3P%,Avg FT,Avg FTA,Avg FT%,Avg ORB,Avg DRB,Avg TRB,Avg AST,Avg STL,Avg BLK,Avg TOV,Avg PF,Previous Game Data,Time between games,Win_pct
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,48,94,0.511,9,18,0.5,12,15,0.8,14,36,50,20,11,6,13,18,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,36,95,0.379,7,30,0.233,12,14,0.857,6,32,38,19,11,4,13,29,0.5,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,32,94,0.34,7,27,0.259,33,36,0.917,16,32,48,21,6,2,16,25,0.333333,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,36,76,0.474,7,17,0.412,14,18,0.778,6,35,41,14,9,1,20,17,0.25,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,32,81,0.395,11,28,0.393,11,13,0.846,7,33,40,22,10,5,10,23,0.2,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25


In [109]:
pd.set_option('display.max_columns', 999)

In [110]:
newDf = pd.merge(full_stats_data, rep_full_stats_data, left_on = ['date_game','opposing_team'],right_on = ['date_game','team_name'])

newDf.head()


Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,FG_x,FGA_x,FG %_x,3P_x,3PA_x,3P%_x,FT_x,FTA_x,FT%_x,ORB_x,DRB_x,TRB_x,AST_x,STL_x,BLK_x,TOV_x,PF_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,index_y,game_start_time_y,team_name_y,team_pts_y,opposing_team_y,opposing_team_pts_y,box_score_text_y,overtimes_y,attendance_y,game_remarks_y,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,48,94,0.511,9,18,0.5,12,15,0.8,14,36,50,20,11,6,13,18,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,1150,8:30p,Dallas Mavericks,111,Atlanta Hawks,117,https://www.basketball-reference.com/boxscores...,,19709,,38,86,0.442,17,45,0.378,18,21,0.857,9,32,41,27,5,4,15,19,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,36,95,0.379,7,30,0.233,12,14,0.857,6,32,38,19,11,4,13,29,0.5,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0,1158,7:00p,Charlotte Hornets,109,Atlanta Hawks,91,https://www.basketball-reference.com/boxscores...,,18417,,37,86,0.43,11,36,0.306,24,30,0.8,12,45,57,17,5,4,21,18,0.5,1,90.0,102.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2017-10-18,2 days,0.0
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,32,94,0.34,7,27,0.259,33,36,0.917,16,32,48,21,6,2,16,25,0.333333,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5,1179,3:30p,Brooklyn Nets,116,Atlanta Hawks,104,https://www.basketball-reference.com/boxscores...,,13917,,41,87,0.471,11,27,0.407,23,34,0.676,13,38,51,27,12,8,18,27,0.666667,1,128.5,130.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2017-10-20,2 days,0.5
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,36,76,0.474,7,17,0.412,14,18,0.778,6,35,41,14,9,1,20,17,0.25,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333,1183,7:30p,Miami Heat,104,Atlanta Hawks,93,https://www.basketball-reference.com/boxscores...,,19600,,39,85,0.459,14,37,0.378,12,18,0.667,10,31,41,21,12,6,19,19,0.666667,1,110.5,112.0,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2017-10-21,2 days,0.5
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,32,81,0.395,11,28,0.393,11,13,0.846,7,33,40,22,10,5,10,23,0.2,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25,1206,8:00p,Chicago Bulls,91,Atlanta Hawks,86,https://www.basketball-reference.com/boxscores...,,21104,,31,86,0.36,7,32,0.219,22,29,0.759,18,44,62,20,5,4,13,15,0.25,1,96.333333,107.666667,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.0


In [111]:
cleandNewDf2 = newDf.drop(newDf.loc[:,'index_y':'game_remarks_y'],axis = 1)

In [112]:
cleandNewDf2

Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,FG_x,FGA_x,FG %_x,3P_x,3PA_x,3P%_x,FT_x,FTA_x,FT%_x,ORB_x,DRB_x,TRB_x,AST_x,STL_x,BLK_x,TOV_x,PF_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,48,94,.511,9,18,.500,12,15,.800,14,36,50,20,11,6,13,18,1.000000,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,38,86,.442,17,45,.378,18,21,.857,9,32,41,27,5,4,15,19,0.000000,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,36,95,.379,7,30,.233,12,14,.857,6,32,38,19,11,4,13,29,0.500000,0,117.000000,111.000000,48.000000,94.000000,0.511000,9.000000,18.000000,0.500000,12.000000,15.000000,0.800000,14.000000,36.000000,50.000000,20.000000,11.000000,6.000000,13.000000,18.000000,2017-10-18,2 days,1.000000,37,86,.430,11,36,.306,24,30,.800,12,45,57,17,5,4,21,18,0.500000,1,90.000000,102.000000,29.000000,73.000000,0.397000,9.000000,30.000000,0.300000,23.000000,29.000000,0.793000,3.000000,44.000000,47.000000,16.000000,4.000000,3.000000,17.000000,15.000000,2017-10-18,2 days,0.000000
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,32,94,.340,7,27,.259,33,36,.917,16,32,48,21,6,2,16,25,0.333333,0,104.000000,110.000000,42.000000,94.500000,0.445000,8.000000,24.000000,0.366500,12.000000,14.500000,0.828500,10.000000,34.000000,44.000000,19.500000,11.000000,5.000000,13.000000,23.500000,2017-10-20,2 days,0.500000,41,87,.471,11,27,.407,23,34,.676,13,38,51,27,12,8,18,27,0.666667,1,128.500000,130.500000,45.500000,93.000000,0.489500,10.500000,28.500000,0.366500,27.000000,33.500000,0.810000,13.000000,32.500000,45.500000,22.000000,10.000000,4.000000,19.000000,21.000000,2017-10-20,2 days,0.500000
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,36,76,.474,7,17,.412,14,18,.778,6,35,41,14,9,1,20,17,0.250000,0,104.000000,112.000000,38.666667,94.333333,0.410000,7.666667,25.000000,0.330667,19.000000,21.666667,0.858000,12.000000,33.333333,45.333333,20.000000,9.333333,4.000000,14.000000,24.000000,2017-10-22,1 days,0.333333,39,85,.459,14,37,.378,12,18,.667,10,31,41,21,12,6,19,19,0.666667,1,110.500000,112.000000,44.500000,94.000000,0.476500,11.000000,32.000000,0.339500,10.500000,15.500000,0.668000,8.500000,36.000000,44.500000,25.500000,6.000000,7.500000,13.500000,19.500000,2017-10-21,2 days,0.500000
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,32,81,.395,11,28,.393,11,13,.846,7,33,40,22,10,5,10,23,0.200000,0,101.250000,110.000000,38.000000,89.750000,0.426000,7.500000,23.000000,0.351000,17.750000,20.750000,0.838000,10.500000,33.750000,44.250000,18.500000,9.250000,3.250000,15.500000,22.250000,2017-10-23,3 days,0.250000,31,86,.360,7,32,.219,22,29,.759,18,44,62,20,5,4,13,15,0.250000,1,96.333333,107.666667,36.666667,86.333333,0.423667,11.666667,31.000000,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.000000,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.000000
5,1213,2017-10-27,7:30p,Atlanta Hawks,100,Denver Nuggets,105,https://www.basketball-reference.com/boxscores...,,16220,,37,89,.416,14,33,.424,12,12,1.000,11,32,43,23,7,4,12,21,0.166667,1,98.200000,106.200000,36.800000,88.000000,0.419800,8.200000,24.000000,0.359400,16.400000,19.200000,0.839600,9.800000,33.600000,43.400000,19.200000,9.400000,3.600000,14.400000,22.400000,2017-10-26,1 days,0.200000,42,91,.462,4,20,.200,17,23,.739,11,35,46,25,8,4,11,12,0.400000,0,97.250000,101.000000,37.500000,85.500000,0.442750,9.750000,27.750000,0.355000,12.500000,17.500000,0.725000,13.000000,34.750000,47.750000,22.750000,5.500000,2.000000,18.000000,20.500000,2017-10-25,2 days,0.250000
6,1226,2017-10-29,3:30p,Atlanta Hawks,106,Milwaukee Bucks,117,https://www.basketball-reference.com/boxscores...,,14014,,36,77,.468,13,29,.448,21,26,.808,3,28,31,27,9,2,16,21,0.142857,1,98.500000,106.000000,36.833333,88.166667,0.419167,9.166667,25.500000,0.370167,15.666667,18.000000,0.866333,10.000000,33.333333,43.333333,19.833333,9.000000,3.666667,14.000000,22.166667,2017-10-27,2 days,0.166667,44,81,.543,13,26,.500,16,21,.762,5,38,43,30,9,3,14,27,0.666667,0,102.000000,103.200000,38.600000,80.000000,0.482800,9.000000,25.000000,0.364400,15.800000,20.400000,0.769000,5.600000,32.200000,37.800000,22.000000,7.600000,4.200000,14.400000,21.800000,2017-10-26,3 days,0.600000
7,106,2017-11-01,7:00p,Atlanta Hawks,109,Philadelphia 76ers,119,https://www.basketball-reference.com/boxscores...,,20549,,41,94,.436,8,23,.348,19,27,.704,12,37,49,22,11,4,18,28,0.125000,0,99.571429,107.571429,36.714286,86.571429,0.426143,9.714286,26.000000,0.381286,16.428571,19.142857,0.858000,9.000000,32.571429,41.571429,20.857143,9.000000,3.428571,14.285714,22.000000,2017-10-29,3 days,0.142857,46,99,.465,13,35,.371,14,25,.560,15,38,53,36,10,4,16,26,0.500000,1,104.142857,108.285714,39.285714,86.571429,0.455143,11.428571,30.000000,0.379571,14.142857,20.428571,0.695714,10.142857,35.285714,45.428571,24.142857,9.000000,3.714286,17.142857,25.571429,2017-10-30,2 days,0.428571
8,1264,2017-11-03,7:30p,Atlanta Hawks,104,Houston Rockets,119,https://www.basketball-reference.com/boxscores...,,14087,,39,81,.481,11,33,.333,15,18,.833,5,36,41,16,8,8,18,16,0.111111,1,100.750000,109.000000,37.250000,87.500000,0.427375,9.500000,25.625000,0.377125,16.750000,20.125000,0.838750,9.375000,33.125000,42.500000,21.000000,9.250000,3.500000,14.750000,22.750000,2017-11-01,2 days,0.125000,42,89,.472,16,47,.340,19,23,.826,9,35,44,28,14,4,12,16,0.700000,0,105.888889,102.444444,36.555556,83.000000,0.439333,14.111111,44.888889,0.312000,18.666667,23.333333,0.800222,8.888889,35.000000,43.888889,20.888889,7.888889,4.777778,15.000000,21.111111,2017-11-01,2 days,0.666667
9,135,2017-11-05,3:00p,Atlanta Hawks,117,Cleveland Cavaliers,115,https://www.basketball-reference.com/boxscores...,,20562,,41,86,.477,11,25,.440,24,34,.706,11,33,44,24,4,6,17,20,0.200000,0,101.111111,110.111111,37.444444,86.777778,0.433333,9.666667,26.444444,0.372222,16.555556,19.888889,0.838111,8.888889,33.444444,42.333333,20.444444,9.111111,4.000000,15.111111,22.000000,2017-11-03,2 days,0.111111,42,96,.438,10,36,.278,21,23,.913,15,32,47,27,6,6,13,29,0.400000,1,107.777778,113.000000,39.222222,82.333333,0.476667,10.222222,29.888889,0.338000,19.111111,23.888889,0.803111,8.222222,33.666667,41.888889,21.333333,6.111111,4.222222,15.888889,18.777778,2017-11-03,2 days,0.444444


In [113]:
# cleandNewDf3 = cleandNewDf2.drop(cleandNewDf2.loc[:,'Avg1 Pts_y':'Avg1 PF_y'],axis = 1)

In [None]:
cleandNewDf3.head()

In [114]:
cleandNewDf4 = cleandNewDf2.drop(cleandNewDf2.loc[:,'FG_x':'PF_x'],axis = 1)

In [115]:
cleandNewDf4.head()

Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,38,86,0.442,17,45,0.378,18,21,0.857,9,32,41,27,5,4,15,19,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,0.5,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0,37,86,0.43,11,36,0.306,24,30,0.8,12,45,57,17,5,4,21,18,0.5,1,90.0,102.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2017-10-18,2 days,0.0
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,0.333333,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5,41,87,0.471,11,27,0.407,23,34,0.676,13,38,51,27,12,8,18,27,0.666667,1,128.5,130.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2017-10-20,2 days,0.5
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,0.25,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333,39,85,0.459,14,37,0.378,12,18,0.667,10,31,41,21,12,6,19,19,0.666667,1,110.5,112.0,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2017-10-21,2 days,0.5
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,0.2,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25,31,86,0.36,7,32,0.219,22,29,0.759,18,44,62,20,5,4,13,15,0.25,1,96.333333,107.666667,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.0


In [116]:
cleandNewDf5 = cleandNewDf4.drop(cleandNewDf4.loc[:,'FG_y':'PF_y'],axis = 1)

In [117]:
cleandNewDf5.head()

Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,8,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,19709,,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,16,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,18417,,0.5,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0,0.5,1,90.0,102.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2017-10-18,2 days,0.0
2,37,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,13917,,0.333333,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5,0.666667,1,128.5,130.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2017-10-20,2 days,0.5
3,41,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19600,,0.25,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333,0.666667,1,110.5,112.0,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2017-10-21,2 days,0.5
4,64,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,21104,,0.2,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25,0.25,1,96.333333,107.666667,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.0


In [118]:
cleandNewDf = cleandNewDf5.drop(['index_x','overtimes_x','attendance_x'],axis =1)

In [121]:
cleandNewDf.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,0.5,0,117.0,111.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0,0.5,1,90.0,102.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2017-10-18,2 days,0.0
2,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,0.333333,0,104.0,110.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5,0.666667,1,128.5,130.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2017-10-20,2 days,0.5
3,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,0.25,0,104.0,112.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333,0.666667,1,110.5,112.0,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2017-10-21,2 days,0.5
4,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,0.2,0,101.25,110.0,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25,0.25,1,96.333333,107.666667,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.0


In [122]:
cleandNewDf = cleandNewDf.drop(['Team_win_pct_x','Team_win_pct_y'],axis =1)

In [128]:
cleandNewDf = cleandNewDf.drop(['Avg Opp Pts_x','Avg Opp Pts_y'],axis =1)

In [129]:
#Delete duplicates

dedupeData = cleandNewDf.drop_duplicates(subset ='box_score_text_x',keep = 'first')

dedupeData.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Home Team_x,Avg Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Home Team_y,Avg Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,NaT,,1,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,0,117.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2017-10-18,2 days,1.0,1,90.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2017-10-18,2 days,0.0
2,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,0,104.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2017-10-20,2 days,0.5,1,128.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2017-10-20,2 days,0.5
3,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,0,104.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,2017-10-22,1 days,0.333333,1,110.5,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2017-10-21,2 days,0.5
4,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,0,101.25,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,2017-10-23,3 days,0.25,1,96.333333,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2017-10-24,2 days,0.0


In [130]:
dedupeData.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text',
       'game_remarks','Team Home?','Avg Team Pts', 
       'Avg Team FG', 'Avg Team FGA', 'Avg Team FG %', 'Avg Team 3P', 'Avg Team 3PA',
       'Avg Team 3P%', 'Avg Team FT', 'Avg Team FTA', 'Avg Team FT%', 'Avg Team ORB',
       'Avg Team DRB', 'Avg Team TRB', 'Avg Team AST', 'Avg Team STL', 'Avg Team BLK',
       'Avg Team TOV', 'Avg Team PF', 'Previous Game Data_x', 'Team Time between previous game',
       'Team_Win_Pct','Home Team_y', 'Avg Opp Pts',
       'Avg Opp FG', 'Avg Opp FGA', 'Avg Opp FG %', 'Avg Opp 3P', 'Avg Opp 3PA',
       'Avg Opp 3P%', 'Avg Opp FT', 'Avg Opp FTA', 'Avg Opp FT%', 'Avg Opp ORB',
       'Avg Opp DRB', 'Avg Opp TRB', 'Avg Opp AST', 'Avg Opp STL', 'Avg Opp BLK',
       'Avg Opp TOV', 'Avg Opp PF','Previous Game Data_y', 'Opp Time between previous game','Opp_Win_Pct']

In [None]:
Avg Opp Pts_x, 'Previous Game Data_x''Home Team_y', 'Avg Opp Pts_y', 'Previous Game Data_y',

In [131]:
dedupeCleanDf = dedupeData.drop(['Previous Game Data_x','Home Team_y','Previous Game Data_y'],axis = 1)

In [132]:
dedupeCleanDf.head()

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct
0,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,
1,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,0,117.0,48.0,94.0,0.511,9.0,18.0,0.5,12.0,15.0,0.8,14.0,36.0,50.0,20.0,11.0,6.0,13.0,18.0,2 days,1.0,90.0,29.0,73.0,0.397,9.0,30.0,0.3,23.0,29.0,0.793,3.0,44.0,47.0,16.0,4.0,3.0,17.0,15.0,2 days,0.0
2,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,0,104.0,42.0,94.5,0.445,8.0,24.0,0.3665,12.0,14.5,0.8285,10.0,34.0,44.0,19.5,11.0,5.0,13.0,23.5,2 days,0.5,128.5,45.5,93.0,0.4895,10.5,28.5,0.3665,27.0,33.5,0.81,13.0,32.5,45.5,22.0,10.0,4.0,19.0,21.0,2 days,0.5
3,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,0,104.0,38.666667,94.333333,0.41,7.666667,25.0,0.330667,19.0,21.666667,0.858,12.0,33.333333,45.333333,20.0,9.333333,4.0,14.0,24.0,1 days,0.333333,110.5,44.5,94.0,0.4765,11.0,32.0,0.3395,10.5,15.5,0.668,8.5,36.0,44.5,25.5,6.0,7.5,13.5,19.5,2 days,0.5
4,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,0,101.25,38.0,89.75,0.426,7.5,23.0,0.351,17.75,20.75,0.838,10.5,33.75,44.25,18.5,9.25,3.25,15.5,22.25,3 days,0.25,96.333333,36.666667,86.333333,0.423667,11.666667,31.0,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.0,22.666667,4.333333,3.333333,15.666667,19.333333,2 days,0.0


In [133]:
#Shift date to check if back-to-back game 

def tb2b(row):
    if pd.isnull(row['Team Time between previous game']):
        return(0)
    elif row['Team Time between previous game'].days == 1:
        return(1)
    return(0)

def ob2b(row):
    if pd.isnull(row['Opp Time between previous game']):
        return(0)
    elif row['Opp Time between previous game'].days == 1:
        return(1)
    return(0)
    
dedupeCleanDf['Team B2B?'] = dedupeCleanDf.apply(tb2b,axis = 1)
dedupeCleanDf['Opp B2B?'] = dedupeCleanDf.apply(ob2b,axis = 1)

In [134]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?
0,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0
1,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,0,117.000000,48.000000,94.000000,0.511000,9.000000,18.000000,0.500000,12.000000,15.000000,0.800000,14.000000,36.000000,50.000000,20.000000,11.000000,6.000000,13.000000,18.000000,2 days,1.000000,90.000000,29.000000,73.000000,0.397000,9.000000,30.000000,0.300000,23.000000,29.000000,0.793000,3.000000,44.000000,47.000000,16.000000,4.000000,3.000000,17.000000,15.000000,2 days,0.000000,0,0
2,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,0,104.000000,42.000000,94.500000,0.445000,8.000000,24.000000,0.366500,12.000000,14.500000,0.828500,10.000000,34.000000,44.000000,19.500000,11.000000,5.000000,13.000000,23.500000,2 days,0.500000,128.500000,45.500000,93.000000,0.489500,10.500000,28.500000,0.366500,27.000000,33.500000,0.810000,13.000000,32.500000,45.500000,22.000000,10.000000,4.000000,19.000000,21.000000,2 days,0.500000,0,0
3,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,0,104.000000,38.666667,94.333333,0.410000,7.666667,25.000000,0.330667,19.000000,21.666667,0.858000,12.000000,33.333333,45.333333,20.000000,9.333333,4.000000,14.000000,24.000000,1 days,0.333333,110.500000,44.500000,94.000000,0.476500,11.000000,32.000000,0.339500,10.500000,15.500000,0.668000,8.500000,36.000000,44.500000,25.500000,6.000000,7.500000,13.500000,19.500000,2 days,0.500000,1,0
4,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,0,101.250000,38.000000,89.750000,0.426000,7.500000,23.000000,0.351000,17.750000,20.750000,0.838000,10.500000,33.750000,44.250000,18.500000,9.250000,3.250000,15.500000,22.250000,3 days,0.250000,96.333333,36.666667,86.333333,0.423667,11.666667,31.000000,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.000000,22.666667,4.333333,3.333333,15.666667,19.333333,2 days,0.000000,0,0
5,2017-10-27,7:30p,Atlanta Hawks,100,Denver Nuggets,105,https://www.basketball-reference.com/boxscores...,,1,98.200000,36.800000,88.000000,0.419800,8.200000,24.000000,0.359400,16.400000,19.200000,0.839600,9.800000,33.600000,43.400000,19.200000,9.400000,3.600000,14.400000,22.400000,1 days,0.200000,97.250000,37.500000,85.500000,0.442750,9.750000,27.750000,0.355000,12.500000,17.500000,0.725000,13.000000,34.750000,47.750000,22.750000,5.500000,2.000000,18.000000,20.500000,2 days,0.250000,1,0
6,2017-10-29,3:30p,Atlanta Hawks,106,Milwaukee Bucks,117,https://www.basketball-reference.com/boxscores...,,1,98.500000,36.833333,88.166667,0.419167,9.166667,25.500000,0.370167,15.666667,18.000000,0.866333,10.000000,33.333333,43.333333,19.833333,9.000000,3.666667,14.000000,22.166667,2 days,0.166667,102.000000,38.600000,80.000000,0.482800,9.000000,25.000000,0.364400,15.800000,20.400000,0.769000,5.600000,32.200000,37.800000,22.000000,7.600000,4.200000,14.400000,21.800000,3 days,0.600000,0,0
7,2017-11-01,7:00p,Atlanta Hawks,109,Philadelphia 76ers,119,https://www.basketball-reference.com/boxscores...,,0,99.571429,36.714286,86.571429,0.426143,9.714286,26.000000,0.381286,16.428571,19.142857,0.858000,9.000000,32.571429,41.571429,20.857143,9.000000,3.428571,14.285714,22.000000,3 days,0.142857,104.142857,39.285714,86.571429,0.455143,11.428571,30.000000,0.379571,14.142857,20.428571,0.695714,10.142857,35.285714,45.428571,24.142857,9.000000,3.714286,17.142857,25.571429,2 days,0.428571,0,0
8,2017-11-03,7:30p,Atlanta Hawks,104,Houston Rockets,119,https://www.basketball-reference.com/boxscores...,,1,100.750000,37.250000,87.500000,0.427375,9.500000,25.625000,0.377125,16.750000,20.125000,0.838750,9.375000,33.125000,42.500000,21.000000,9.250000,3.500000,14.750000,22.750000,2 days,0.125000,105.888889,36.555556,83.000000,0.439333,14.111111,44.888889,0.312000,18.666667,23.333333,0.800222,8.888889,35.000000,43.888889,20.888889,7.888889,4.777778,15.000000,21.111111,2 days,0.666667,0,0
9,2017-11-05,3:00p,Atlanta Hawks,117,Cleveland Cavaliers,115,https://www.basketball-reference.com/boxscores...,,0,101.111111,37.444444,86.777778,0.433333,9.666667,26.444444,0.372222,16.555556,19.888889,0.838111,8.888889,33.444444,42.333333,20.444444,9.111111,4.000000,15.111111,22.000000,2 days,0.111111,107.777778,39.222222,82.333333,0.476667,10.222222,29.888889,0.338000,19.111111,23.888889,0.803111,8.222222,33.666667,41.888889,21.333333,6.111111,4.222222,15.888889,18.777778,2 days,0.444444,0,0


In [136]:
#East Team, West Team

nbaDictwest = {
'Atlanta Hawks': 0,
'Brooklyn Nets': 0,
'Boston Celtics': 0,
'Charlotte Hornets': 0,
'Chicago Bulls': 0,
'Cleveland Cavaliers': 0,
'Dallas Mavericks': 1,
'Denver Nuggets': 1,
'Detroit Pistons': 0,
'Golden State Warriors': 1,
'Houston Rockets': 1,
'Indiana Pacers': 0,
'Los Angeles Clippers': 1,
'Los Angeles Lakers': 1,
'Memphis Grizzlies': 1,
'Miami Heat': 0,
'Milwaukee Bucks': 0,
'Minnesota Timberwolves': 1,
'New Orleans Pelicans': 1,
'New York Knicks': 0,
'Oklahoma City Thunder': 1,
'Orlando Magic': 0,
'Philadelphia 76ers': 0,
'Phoenix Suns': 1,
'Portland Trail Blazers': 1,
'Sacramento Kings': 1,
'San Antonio Spurs': 1,
'Toronto Raptors': 0,
'Utah Jazz': 1,
'Washington Wizards': 0}

dedupeCleanDf['Team West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['team_name']],axis=1)
dedupeCleanDf['Opp West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['opposing_team']],axis=1)

In [137]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?,Team West?,Opp West?
0,2017-10-18,8:30p,Atlanta Hawks,117,Dallas Mavericks,111,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0,0,1
1,2017-10-20,7:00p,Atlanta Hawks,91,Charlotte Hornets,109,https://www.basketball-reference.com/boxscores...,,0,117.000000,48.000000,94.000000,0.511000,9.000000,18.000000,0.500000,12.000000,15.000000,0.800000,14.000000,36.000000,50.000000,20.000000,11.000000,6.000000,13.000000,18.000000,2 days,1.000000,90.000000,29.000000,73.000000,0.397000,9.000000,30.000000,0.300000,23.000000,29.000000,0.793000,3.000000,44.000000,47.000000,16.000000,4.000000,3.000000,17.000000,15.000000,2 days,0.000000,0,0,0,0
2,2017-10-22,3:30p,Atlanta Hawks,104,Brooklyn Nets,116,https://www.basketball-reference.com/boxscores...,,0,104.000000,42.000000,94.500000,0.445000,8.000000,24.000000,0.366500,12.000000,14.500000,0.828500,10.000000,34.000000,44.000000,19.500000,11.000000,5.000000,13.000000,23.500000,2 days,0.500000,128.500000,45.500000,93.000000,0.489500,10.500000,28.500000,0.366500,27.000000,33.500000,0.810000,13.000000,32.500000,45.500000,22.000000,10.000000,4.000000,19.000000,21.000000,2 days,0.500000,0,0,0,0
3,2017-10-23,7:30p,Atlanta Hawks,93,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,0,104.000000,38.666667,94.333333,0.410000,7.666667,25.000000,0.330667,19.000000,21.666667,0.858000,12.000000,33.333333,45.333333,20.000000,9.333333,4.000000,14.000000,24.000000,1 days,0.333333,110.500000,44.500000,94.000000,0.476500,11.000000,32.000000,0.339500,10.500000,15.500000,0.668000,8.500000,36.000000,44.500000,25.500000,6.000000,7.500000,13.500000,19.500000,2 days,0.500000,1,0,0,0
4,2017-10-26,8:00p,Atlanta Hawks,86,Chicago Bulls,91,https://www.basketball-reference.com/boxscores...,,0,101.250000,38.000000,89.750000,0.426000,7.500000,23.000000,0.351000,17.750000,20.750000,0.838000,10.500000,33.750000,44.250000,18.500000,9.250000,3.250000,15.500000,22.250000,3 days,0.250000,96.333333,36.666667,86.333333,0.423667,11.666667,31.000000,0.371333,11.333333,13.333333,0.837667,8.666667,33.333333,42.000000,22.666667,4.333333,3.333333,15.666667,19.333333,2 days,0.000000,0,0,0,0
5,2017-10-27,7:30p,Atlanta Hawks,100,Denver Nuggets,105,https://www.basketball-reference.com/boxscores...,,1,98.200000,36.800000,88.000000,0.419800,8.200000,24.000000,0.359400,16.400000,19.200000,0.839600,9.800000,33.600000,43.400000,19.200000,9.400000,3.600000,14.400000,22.400000,1 days,0.200000,97.250000,37.500000,85.500000,0.442750,9.750000,27.750000,0.355000,12.500000,17.500000,0.725000,13.000000,34.750000,47.750000,22.750000,5.500000,2.000000,18.000000,20.500000,2 days,0.250000,1,0,0,1
6,2017-10-29,3:30p,Atlanta Hawks,106,Milwaukee Bucks,117,https://www.basketball-reference.com/boxscores...,,1,98.500000,36.833333,88.166667,0.419167,9.166667,25.500000,0.370167,15.666667,18.000000,0.866333,10.000000,33.333333,43.333333,19.833333,9.000000,3.666667,14.000000,22.166667,2 days,0.166667,102.000000,38.600000,80.000000,0.482800,9.000000,25.000000,0.364400,15.800000,20.400000,0.769000,5.600000,32.200000,37.800000,22.000000,7.600000,4.200000,14.400000,21.800000,3 days,0.600000,0,0,0,0
7,2017-11-01,7:00p,Atlanta Hawks,109,Philadelphia 76ers,119,https://www.basketball-reference.com/boxscores...,,0,99.571429,36.714286,86.571429,0.426143,9.714286,26.000000,0.381286,16.428571,19.142857,0.858000,9.000000,32.571429,41.571429,20.857143,9.000000,3.428571,14.285714,22.000000,3 days,0.142857,104.142857,39.285714,86.571429,0.455143,11.428571,30.000000,0.379571,14.142857,20.428571,0.695714,10.142857,35.285714,45.428571,24.142857,9.000000,3.714286,17.142857,25.571429,2 days,0.428571,0,0,0,0
8,2017-11-03,7:30p,Atlanta Hawks,104,Houston Rockets,119,https://www.basketball-reference.com/boxscores...,,1,100.750000,37.250000,87.500000,0.427375,9.500000,25.625000,0.377125,16.750000,20.125000,0.838750,9.375000,33.125000,42.500000,21.000000,9.250000,3.500000,14.750000,22.750000,2 days,0.125000,105.888889,36.555556,83.000000,0.439333,14.111111,44.888889,0.312000,18.666667,23.333333,0.800222,8.888889,35.000000,43.888889,20.888889,7.888889,4.777778,15.000000,21.111111,2 days,0.666667,0,0,0,1
9,2017-11-05,3:00p,Atlanta Hawks,117,Cleveland Cavaliers,115,https://www.basketball-reference.com/boxscores...,,0,101.111111,37.444444,86.777778,0.433333,9.666667,26.444444,0.372222,16.555556,19.888889,0.838111,8.888889,33.444444,42.333333,20.444444,9.111111,4.000000,15.111111,22.000000,2 days,0.111111,107.777778,39.222222,82.333333,0.476667,10.222222,29.888889,0.338000,19.111111,23.888889,0.803111,8.222222,33.666667,41.888889,21.333333,6.111111,4.222222,15.888889,18.777778,2 days,0.444444,0,0,0,0


In [138]:
#Output to csv for saving purposes
export_csv = dedupeCleanDf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\nba_17_18.csv') #Don't forget to add '.csv' at the end of the path


In [2]:
pd.read

NameError: name 'dedupeCleanDf' is not defined