In [1]:
from bs4 import BeautifulSoup
import requests

from IPython.core.display import display, HTML

import pandas as pd

import re

import numpy as np

import datetime

import pickle

In [2]:
#Retrieve breakdown of games from october to end of march

urlList = [
    'https://www.basketball-reference.com/leagues/NBA_2015_games-october.html',
    'https://www.basketball-reference.com/leagues/NBA_2015_games-november.html',
    'https://www.basketball-reference.com/leagues/NBA_2015_games-december.html',
    'https://www.basketball-reference.com/leagues/NBA_2015_games-january.html',
    'https://www.basketball-reference.com/leagues/NBA_2015_games-february.html',
    'https://www.basketball-reference.com/leagues/NBA_2015_games-march.html'
]
soupList = []
for url in urlList:
    response = requests.get(url)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    soupList.append(soup)
[soup1,soup2,soup3,soup4,soup5,soup6] = soupList

In [3]:
headerData = soup1.find(class_ = 'overthrow table_container').find_all('tr')[0].find_all('th')

In [4]:
column_headers = []

#get headers
for header in headerData:
    column_headers.append(header['data-stat'])
column_headers

['date_game',
 'game_start_time',
 'visitor_team_name',
 'visitor_pts',
 'home_team_name',
 'home_pts',
 'box_score_text',
 'overtimes',
 'attendance',
 'game_remarks']

In [5]:
fullDataList = []

for soup in soupList:
    cellData = soup.find(class_ = 'overthrow table_container').find_all('tr')
    for row in range(1,len(cellData)):
        rowList = []
        for cell in cellData[row].find_all(lambda tag: tag.name == 'td' or tag.name == 'th'):
            if cell.contents == []:
                value = ''
            elif cell.contents[0].name == 'a':
                if cell.contents[0].contents[0] == 'Box Score':
                    value = 'https://www.basketball-reference.com'+ cell.contents[0]['href']
                else:
                    value = cell.contents[0].contents[0]
            else:
                value = cell.contents[0]
            rowList.append(value)
        fullDataList.append(rowList)

cleanCellData = [x for x in fullDataList if x != []]

cleanCellData

[['Tue, Oct 28, 2014',
  '8:00p',
  'Orlando Magic',
  '84',
  'New Orleans Pelicans',
  '101',
  'https://www.basketball-reference.com/boxscores/201410280NOP.html',
  '',
  '17,097',
  ''],
 ['Tue, Oct 28, 2014',
  '8:00p',
  'Dallas Mavericks',
  '100',
  'San Antonio Spurs',
  '101',
  'https://www.basketball-reference.com/boxscores/201410280SAS.html',
  '',
  '19,615',
  ''],
 ['Tue, Oct 28, 2014',
  '10:30p',
  'Houston Rockets',
  '108',
  'Los Angeles Lakers',
  '90',
  'https://www.basketball-reference.com/boxscores/201410280LAL.html',
  '',
  '18,997',
  ''],
 ['Wed, Oct 29, 2014',
  '7:00p',
  'Milwaukee Bucks',
  '106',
  'Charlotte Hornets',
  '108',
  'https://www.basketball-reference.com/boxscores/201410290CHO.html',
  'OT',
  '19,439',
  ''],
 ['Wed, Oct 29, 2014',
  '7:00p',
  'Philadelphia 76ers',
  '91',
  'Indiana Pacers',
  '103',
  'https://www.basketball-reference.com/boxscores/201410290IND.html',
  '',
  '18,165',
  ''],
 ['Wed, Oct 29, 2014',
  '7:30p',
  'Atlan

In [6]:
#Combine Header and Data to create intiial DF

df = pd.DataFrame(cleanCellData,columns = column_headers)

In [7]:
def date_change(row):
    t = datetime.datetime.strptime(row['date_game'].replace(',',''), "%a %b %d %Y")
    convert_date = t.strftime('%m/%d/%Y')
    return(convert_date)

df['Date'] = pd.to_datetime(df.apply(date_change,axis = 1))

In [8]:
nbaDict = {
'Atlanta Hawks': 'ATL',
'Brooklyn Nets': 'BRK',
'Boston Celtics': 'BOS',
'Charlotte Hornets': 'CHO',
'Chicago Bulls': 'CHI',
'Cleveland Cavaliers': 'CLE',
'Dallas Mavericks': 'DAL',
'Denver Nuggets': 'DEN',
'Detroit Pistons': 'DET',
'Golden State Warriors': 'GSW',
'Houston Rockets': 'HOU',
'Indiana Pacers': 'IND',
'Los Angeles Clippers': 'LAC',
'Los Angeles Lakers': 'LAL',
'Memphis Grizzlies': 'MEM',
'Miami Heat': 'MIA',
'Milwaukee Bucks': 'MIL',
'Minnesota Timberwolves': 'MIN',
'New Orleans Pelicans': 'NOP',
'New York Knicks': 'NYK',
'Oklahoma City Thunder': 'OKC',
'Orlando Magic': 'ORL',
'Philadelphia 76ers': 'PHI',
'Phoenix Suns': 'PHO',
'Portland Trail Blazers': 'POR',
'Sacramento Kings': 'SAC',
'San Antonio Spurs': 'SAS',
'Toronto Raptors': 'TOR',
'Utah Jazz': 'UTA',
'Washington Wizards': 'WAS'}

In [9]:
statColumns = ['vis FG','vis FGA','vis FG %','vis 3P','vis 3PA',
    'vis 3P%','vis FT','vis FTA','vis FT%','vis ORB',
    'vis DRB','vis TRB','vis AST','vis STL','vis BLK',
    'vis TOV','vis PF','home FG','home FGA','home FG %',
    'home 3P','home 3PA','home 3P%','home FT','home FTA',
    'home FT%','home ORB','home DRB','home TRB','home AST',
    'home STL','home BLK','home TOV','home PF']

In [10]:
def statRecorder(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    statsList = []
    for team in team_list:
        nbaTeam = nbaDict[team]
        boxScoreLink = "box-{}-game-basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [11]:
#Create new stats list incrementally and then concatenate after
stats_list = []

In [12]:
zeroBatch = df[0:1].apply(statRecorder,axis = 1)
for i in zeroBatch:
    stats_list.insert(0,i)

In [13]:
firstBatch = df[1:101].apply(statRecorder,axis = 1)
for i in firstBatch:
    stats_list.append(i)

In [14]:
secondBatch = df[101:201].apply(statRecorder,axis = 1)
for i in secondBatch:
    stats_list.append(i)

In [15]:
thirdBatch = df[201:301].apply(statRecorder,axis = 1)
for i in thirdBatch:
    stats_list.append(i)

In [16]:
fourthBatch = df[301:401].apply(statRecorder,axis = 1)
for i in fourthBatch:
    stats_list.append(i)

In [17]:
fifthBatch = df[401:501].apply(statRecorder,axis = 1)
for i in fifthBatch:
    stats_list.append(i)

In [18]:
sixthBatch = df[501:548].apply(statRecorder,axis = 1)
for i in sixthBatch:
    stats_list.append(i)

In [19]:
df[545:548]

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,Date
545,"Fri, Jan 9, 2015",10:00p,Denver Nuggets,118,Sacramento Kings,108,https://www.basketball-reference.com/boxscores...,,16029,,2015-01-09
546,"Fri, Jan 9, 2015",10:30p,Cleveland Cavaliers,94,Golden State Warriors,112,https://www.basketball-reference.com/boxscores...,,19596,,2015-01-09
547,"Fri, Jan 9, 2015",10:30p,Orlando Magic,84,Los Angeles Lakers,101,https://www.basketball-reference.com/boxscores...,,18997,,2015-01-09


In [20]:
nbaDictLower = {
'Atlanta Hawks': 'atl',
'Brooklyn Nets': 'brk',
'Boston Celtics': 'bos',
'Charlotte Hornets': 'cho',
'Chicago Bulls': 'chi',
'Cleveland Cavaliers': 'cle',
'Dallas Mavericks': 'dal',
'Denver Nuggets': 'den',
'Detroit Pistons': 'det',
'Golden State Warriors': 'gsw',
'Houston Rockets': 'hou',
'Indiana Pacers': 'ind',
'Los Angeles Clippers': 'lac',
'Los Angeles Lakers': 'lal',
'Memphis Grizzlies': 'mem',
'Miami Heat': 'mia',
'Milwaukee Bucks': 'mil',
'Minnesota Timberwolves': 'min',
'New Orleans Pelicans': 'nop',
'New York Knicks': 'nyk',
'Oklahoma City Thunder': 'okc',
'Orlando Magic': 'orl',
'Philadelphia 76ers': 'phi',
'Phoenix Suns': 'pho',
'Portland Trail Blazers': 'por',
'Sacramento Kings': 'sac',
'San Antonio Spurs': 'sas',
'Toronto Raptors': 'tor',
'Utah Jazz': 'uta',
'Washington Wizards': 'was'}

In [21]:
def statRecorder2(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    statsList = []
    for team in team_list:
        nbaTeam = nbaDictLower[team]
        boxScoreLink = "box_{}_basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [22]:
seventhBatch = df[548:701].apply(statRecorder,axis = 1)
for i in seventhBatch:
    stats_list.append(i)

In [23]:
eighthBatch = df[701:801].apply(statRecorder,axis = 1)
for i in eighthBatch:
    stats_list.append(i)

In [24]:
ninthBatch = df[801:901].apply(statRecorder,axis = 1)
for i in ninthBatch:
    stats_list.append(i)

In [25]:
tenthBatch = df[901:1001].apply(statRecorder,axis = 1)
for i in tenthBatch:
    stats_list.append(i)

In [26]:
eleventhBatch = df[1001:1152].apply(statRecorder,axis = 1)
for i in eleventhBatch:
    stats_list.append(i)

In [27]:
#Create new pd with stats list and stats columns
statsdf = pd.DataFrame(stats_list,columns = statColumns)
statsdf.head()

Unnamed: 0,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,32,84,0.381,4,11,0.364,16,21,0.762,16,...,31,0.484,26,36,62,20,10,17,9,17
1,38,78,0.487,8,21,0.381,16,19,0.842,9,...,16,0.813,9,29,38,23,5,3,20,20
2,31,73,0.425,12,29,0.414,34,50,0.68,14,...,39,0.795,11,25,36,16,7,3,11,32
3,39,80,0.488,7,14,0.5,21,24,0.875,3,...,29,0.69,15,35,50,27,8,9,10,20
4,34,89,0.382,6,21,0.286,17,30,0.567,15,...,28,0.679,14,36,50,22,5,13,17,24


In [28]:
#concatenate original df with stats df
resultdf = pd.concat([df, statsdf], axis=1)
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 28, 2014",8:00p,Orlando Magic,84,New Orleans Pelicans,101,https://www.basketball-reference.com/boxscores...,,17097,,...,31,0.484,26,36,62,20,10,17,9,17
1,"Tue, Oct 28, 2014",8:00p,Dallas Mavericks,100,San Antonio Spurs,101,https://www.basketball-reference.com/boxscores...,,19615,,...,16,0.813,9,29,38,23,5,3,20,20
2,"Tue, Oct 28, 2014",10:30p,Houston Rockets,108,Los Angeles Lakers,90,https://www.basketball-reference.com/boxscores...,,18997,,...,39,0.795,11,25,36,16,7,3,11,32
3,"Wed, Oct 29, 2014",7:00p,Milwaukee Bucks,106,Charlotte Hornets,108,https://www.basketball-reference.com/boxscores...,OT,19439,,...,29,0.69,15,35,50,27,8,9,10,20
4,"Wed, Oct 29, 2014",7:00p,Philadelphia 76ers,91,Indiana Pacers,103,https://www.basketball-reference.com/boxscores...,,18165,,...,28,0.679,14,36,50,22,5,13,17,24


In [29]:
#Output to csv for saving purposes
export_csv = resultdf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\stats_data.csv') #Don't forget to add '.csv' at the end of the path

In [30]:
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 28, 2014",8:00p,Orlando Magic,84,New Orleans Pelicans,101,https://www.basketball-reference.com/boxscores...,,17097,,...,31,0.484,26,36,62,20,10,17,9,17
1,"Tue, Oct 28, 2014",8:00p,Dallas Mavericks,100,San Antonio Spurs,101,https://www.basketball-reference.com/boxscores...,,19615,,...,16,0.813,9,29,38,23,5,3,20,20
2,"Tue, Oct 28, 2014",10:30p,Houston Rockets,108,Los Angeles Lakers,90,https://www.basketball-reference.com/boxscores...,,18997,,...,39,0.795,11,25,36,16,7,3,11,32
3,"Wed, Oct 29, 2014",7:00p,Milwaukee Bucks,106,Charlotte Hornets,108,https://www.basketball-reference.com/boxscores...,OT,19439,,...,29,0.69,15,35,50,27,8,9,10,20
4,"Wed, Oct 29, 2014",7:00p,Philadelphia 76ers,91,Indiana Pacers,103,https://www.basketball-reference.com/boxscores...,,18165,,...,28,0.679,14,36,50,22,5,13,17,24


In [31]:
def win_percent(row):
    testurl = row['box_score_text']
    testresponse = requests.get(testurl)
    testpage = testresponse.text
    testsoup = BeautifulSoup(testpage,"lxml")
    wins_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[0])
    losses_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[1])
    win_pct_team = wins_team/(wins_team+losses_team)
    wins_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[0])
    losses_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[1])
    win_pct_opp = wins_opp/(wins_opp+losses_opp)
    win_list = [win_pct_team, win_pct_opp]
    print(win_list)
    return(win_list)



In [32]:
win_first_batch = resultdf[0:101]
win_second_batch = resultdf[101:201]
win_third_batch = resultdf[201:301]
win_fourth_batch = resultdf[301:401]
win_fifth_batch = resultdf[401:501]
win_sixth_batch = resultdf[501:601]
win_seventh_batch = resultdf[601:701]
win_eighth_batch = resultdf[701:801]
win_ninth_batch = resultdf[801:901]
win_tenth_batch = resultdf[901:1001]
win_eleventh_batch = resultdf[1001:1152]

In [33]:
win_final_list = []
for i in win_first_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[1.0, 0.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[0.5, 0.0]
[0.5, 0.0]
[0.0, 0.5]
[0.0, 0.5]
[0.0, 1.0]
[1.0, 0.5]
[0.5, 0.5]
[0.0, 0.5]
[0.5, 1.0]
[0.5, 0.5]
[1.0, 0.0]
[1.0, 0.5]
[0.6666666666666666, 0.5]
[1.0, 0.0]
[1.0, 0.0]
[0.3333333333333333, 0.6666666666666666]
[0.3333333333333333, 0.5]
[0.5, 0.0]
[0.5, 1.0]
[0.6666666666666666, 0.3333333333333333]
[0.5, 0.3333333333333333]
[0.6666666666666666, 0.3333333333333333]
[0.0, 1.0]
[0.6666666666666666, 0.6666666666666666]
[0.6666666666666666, 1.0]
[0.3333333333333333, 0.6666666666666666]
[1.0, 0.3333333333333333]
[1.0, 0.0]
[0.25, 0.6666666666666666]
[0.3333333333333333, 1.0]
[0.3333333333333333, 0.75]
[0.75, 0.3333333333333333]
[0.25, 0.75]
[0.5, 0.25]
[1.0, 0.75]
[0.75, 0.5]
[0.2, 0.75]
[0.0, 0.75]
[0.25, 0.5]
[0.3333333333333333, 0.5]
[0.75, 0.0]
[0.6, 0.4]
[0.2, 0.0]
[0.8, 0.25]
[0.5, 0.5]
[0.4, 0.25]
[0.8, 0.4]
[0.2

In [34]:
for i in win_second_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5, 0.5714285714285714]
[0.25, 0.875]
[0.14285714285714285, 0.875]
[0.25, 0.5]
[0.625, 0.625]
[0.375, 0.625]
[0.5714285714285714, 0.7142857142857143]
[0.25, 0.75]
[0.3333333333333333, 0.5714285714285714]
[0.3333333333333333, 0.42857142857142855]
[0.3333333333333333, 0.625]
[0.3333333333333333, 0.2222222222222222]
[0.125, 0.5714285714285714]
[0.6666666666666666, 0.14285714285714285]
[0.5714285714285714, 0.625]
[0.875, 0.2857142857142857]
[0.5555555555555556, 0.8888888888888888]
[0.7777777777777778, 0.7777777777777778]
[0.0, 0.6666666666666666]
[0.5, 0.75]
[0.25, 0.3]
[0.4444444444444444, 0.4]
[0.5555555555555556, 0.625]
[0.5714285714285714, 0.375]
[0.4, 0.2]
[0.0, 0.8888888888888888]
[0.25, 0.625]
[0.3333333333333333, 0.3]
[0.4444444444444444, 0.5555555555555556]
[0.625, 0.1111111111111111]
[0.36363636363636365, 0.7777777777777778]
[0.5555555555555556, 0.625]
[0.36363636363636365, 0.8]
[0.36363636363636365, 0.7]
[0.3, 0.9]
[0.2222222222222222, 0.7]
[0.4444444444444444, 0.7]
[0.5555555

In [35]:
for i in win_third_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.26666666666666666, 0.7857142857142857]
[0.42857142857142855, 0.6666666666666666]
[0.6428571428571429, 0.3333333333333333]
[0.5833333333333334, 0.6923076923076923]
[0.8461538461538461, 0.5333333333333333]
[0.21428571428571427, 0.5333333333333333]
[0.6428571428571429, 0.5384615384615384]
[0.6, 0.5]
[0.8, 0.25]
[0.6428571428571429, 0.5]
[0.8571428571428571, 0.35294117647058826]
[0.42857142857142855, 0.0]
[0.8666666666666667, 0.5384615384615384]
[0.25, 0.6875]
[0.6428571428571429, 0.2]
[0.6, 0.8]
[0.5625, 0.23076923076923078]
[0.3125, 0.25]
[0.4, 0.7142857142857143]
[0.4666666666666667, 0.625]
[0.8666666666666667, 0.2]
[0.625, 0.3076923076923077]
[0.8666666666666667, 0.23529411764705882]
[0.5, 0.5714285714285714]
[0.5882352941176471, 0.1875]
[0.7058823529411765, 0.8125]
[0.6666666666666666, 0.75]
[0.3333333333333333, 0.4375]
[0.23529411764705882, 0.29411764705882354]
[0.5625, 0.7333333333333333]
[0.5882352941176471, 0.5]
[0.875, 0.75]
[0.2857142857142857, 0.1875]
[0.7222222222222222, 0.

In [36]:
for i in win_fourth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.3684210526315789, 0.7]
[0.631578947368421, 0.42105263157894735]
[0.42857142857142855, 0.7619047619047619]
[0.9, 0.2]
[0.23809523809523808, 0.5238095238095238]
[0.5454545454545454, 0.75]
[0.7272727272727273, 0.65]
[0.8095238095238095, 0.13636363636363635]
[0.6956521739130435, 0.8095238095238095]
[0.17391304347826086, 0.5]
[0.4782608695652174, 0.38095238095238093]
[0.47619047619047616, 0.5217391304347826]
[0.7142857142857143, 0.2727272727272727]
[0.5, 0.2727272727272727]
[0.35, 0.2857142857142857]
[0.7619047619047619, 0.3181818181818182]
[0.7142857142857143, 0.375]
[0.09523809523809523, 0.7142857142857143]
[0.4, 0.6190476190476191]
[0.47619047619047616, 0.7083333333333334]
[0.7727272727272727, 0.23809523809523808]
[0.16666666666666666, 0.7272727272727273]
[0.45454545454545453, 0.45454545454545453]
[0.7619047619047619, 0.9047619047619048]
[0.6190476190476191, 0.4090909090909091]
[0.7727272727272727, 0.4782608695652174]
[0.7391304347826086, 0.6363636363636364]
[0.7272727272727273, 0.727

In [37]:
for i in win_fifth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.17857142857142858, 0.4230769230769231]
[0.4, 0.4642857142857143]
[0.11538461538461539, 0.3333333333333333]
[0.2962962962962963, 0.4444444444444444]
[0.5172413793103449, 0.7307692307692307]
[0.32142857142857145, 0.19230769230769232]
[0.5185185185185185, 0.4642857142857143]
[0.42857142857142855, 0.32142857142857145]
[0.7586206896551724, 0.6666666666666666]
[0.7586206896551724, 0.7407407407407407]
[0.3103448275862069, 0.75]
[0.7407407407407407, 0.6896551724137931]
[0.6785714285714286, 0.6206896551724138]
[0.42857142857142855, 0.8846153846153846]
[0.18518518518518517, 0.6296296296296297]
[0.5, 0.3448275862068966]
[0.38461538461538464, 0.3548387096774194]
[0.6785714285714286, 0.7037037037037037]
[0.6551724137931034, 0.75]
[0.41379310344827586, 0.4444444444444444]
[0.14814814814814814, 0.4482758620689655]
[0.3448275862068966, 0.4827586206896552]
[0.7666666666666667, 0.4482758620689655]
[0.6666666666666666, 0.5333333333333333]
[0.8518518518518519, 0.32142857142857145]
[0.7142857142857143, 

In [38]:
for i in win_sixth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.125, 0.6764705882352942]
[0.7142857142857143, 0.5588235294117647]
[0.4117647058823529, 0.30303030303030304]
[0.48484848484848486, 0.42857142857142855]
[0.5142857142857142, 0.1388888888888889]
[0.7058823529411765, 0.5555555555555556]
[0.37142857142857144, 0.3235294117647059]
[0.5428571428571428, 0.15151515151515152]
[0.3333333333333333, 0.34375]
[0.7222222222222222, 0.47058823529411764]
[0.6764705882352942, 0.7142857142857143]
[0.13513513513513514, 0.7352941176470589]
[0.42857142857142855, 0.15151515151515152]
[0.6764705882352942, 0.5]
[0.3888888888888889, 0.34285714285714286]
[0.3142857142857143, 0.7714285714285715]
[0.4857142857142857, 0.84375]
[0.7647058823529411, 0.6571428571428571]
[0.5675675675675675, 0.5]
[0.3235294117647059, 0.5833333333333334]
[0.4857142857142857, 0.35135135135135137]
[0.6857142857142857, 0.5277777777777778]
[0.5135135135135135, 0.14705882352941177]
[0.13157894736842105, 0.6857142857142857]
[0.7142857142857143, 0.7714285714285715]
[0.36363636363636365, 0.457

In [39]:
for i in win_seventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.6829268292682927, 0.4146341463414634]
[0.8048780487804879, 0.6428571428571429]
[0.8421052631578947, 0.6829268292682927]
[0.1794871794871795, 0.45]
[0.7317073170731707, 0.725]
[0.6585365853658537, 0.4]
[0.5, 0.65]
[0.5, 0.3409090909090909]
[0.34146341463414637, 0.6190476190476191]
[0.175, 0.40476190476190477]
[0.1951219512195122, 0.6904761904761905]
[0.38095238095238093, 0.8095238095238095]
[0.3333333333333333, 0.6666666666666666]
[0.43902439024390244, 0.8461538461538461]
[0.6904761904761905, 0.7073170731707317]
[0.3488372093023256, 0.6904761904761905]
[0.4878048780487805, 0.14285714285714285]
[0.627906976744186, 0.5238095238095238]
[0.6585365853658537, 0.5121951219512195]
[0.2857142857142857, 0.5813953488372093]
[0.3902439024390244, 0.7380952380952381]
[0.5121951219512195, 0.43902439024390244]
[0.627906976744186, 0.42857142857142855]
[0.42857142857142855, 0.4186046511627907]
[0.3333333333333333, 0.5348837209302325]
[0.16279069767441862, 0.19047619047619047]
[0.3409090909090909, 0.81

In [40]:
for i in win_eighth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.6122448979591837, 0.5833333333333334]
[0.3695652173913043, 0.3469387755102041]
[0.6530612244897959, 0.3]
[0.6875, 0.6458333333333334]
[0.20833333333333334, 0.8333333333333334]
[0.6875, 0.375]
[0.48936170212765956, 0.7446808510638298]
[0.5918367346938775, 0.1702127659574468]
[0.6666666666666666, 0.5319148936170213]
[0.425531914893617, 0.3958333333333333]
[0.6875, 0.625]
[0.5714285714285714, 0.8222222222222222]
[0.44680851063829785, 0.34782608695652173]
[0.2708333333333333, 0.20833333333333334]
[0.20408163265306123, 0.6]
[0.4375, 0.6326530612244898]
[0.673469387755102, 0.40425531914893614]
[0.5416666666666666, 0.673469387755102]
[0.8163265306122449, 0.5416666666666666]
[0.29411764705882354, 0.5]
[0.16666666666666666, 0.66]
[0.75, 0.56]
[0.3877551020408163, 0.22]
[0.4375, 0.3877551020408163]
[0.3617021276595745, 0.20408163265306123]
[0.3541666666666667, 0.673469387755102]
[0.8260869565217391, 0.3617021276595745]
[0.38, 0.36]
[0.62, 0.82]
[0.38, 0.375]
[0.4166666666666667, 0.66]
[0.6, 0

In [41]:
for i in win_ninth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.4, 0.2222222222222222]
[0.6851851851851852, 0.7818181818181819]
[0.6181818181818182, 0.4]
[0.4339622641509434, 0.18518518518518517]
[0.5272727272727272, 0.2222222222222222]
[0.6071428571428571, 0.6]
[0.6666666666666666, 0.6491228070175439]
[0.37037037037037035, 0.5740740740740741]
[0.6666666666666666, 0.37037037037037035]
[0.38461538461538464, 0.3584905660377358]
[0.6181818181818182, 0.8269230769230769]
[0.41509433962264153, 0.24074074074074073]
[0.5454545454545454, 0.41509433962264153]
[0.509090909090909, 0.42592592592592593]
[0.5178571428571429, 0.625]
[0.6727272727272727, 0.6727272727272727]
[0.35185185185185186, 0.6607142857142857]
[0.6140350877192983, 0.18181818181818182]
[0.5892857142857143, 0.4107142857142857]
[0.7857142857142857, 0.5636363636363636]
[0.8113207547169812, 0.4107142857142857]
[0.21818181818181817, 0.3275862068965517]
[0.36363636363636365, 0.5535714285714286]
[0.4074074074074074, 0.6551724137931034]
[0.7407407407407407, 0.6545454545454545]
[0.37735849056603776, 

In [42]:
for i in win_tenth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.36065573770491804, 0.21666666666666667]
[0.38333333333333336, 0.5409836065573771]
[0.21311475409836064, 0.5573770491803278]
[0.3559322033898305, 0.6166666666666667]
[0.5245901639344263, 0.7966101694915254]
[0.6779661016949152, 0.6451612903225806]
[0.5483870967741935, 0.6290322580645161]
[0.6349206349206349, 0.6833333333333333]
[0.6129032258064516, 0.45]
[0.6190476190476191, 0.4426229508196721]
[0.35, 0.31746031746031744]
[0.4098360655737705, 0.20967741935483872]
[0.4426229508196721, 0.5645161290322581]
[0.609375, 0.8032786885245902]
[0.5238095238095238, 0.4166666666666667]
[0.3770491803278688, 0.6774193548387096]
[0.26229508196721313, 0.7213114754098361]
[0.4166666666666667, 0.532258064516129]
[0.3548387096774194, 0.6229508196721312]
[0.625, 0.8]
[0.7096774193548387, 0.5396825396825397]
[0.515625, 0.6153846153846154]
[0.3442622950819672, 0.45161290322580644]
[0.45161290322580644, 0.19672131147540983]
[0.7903225806451613, 0.2222222222222222]
[0.6721311475409836, 0.22950819672131148]


In [43]:
for i in win_eleventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5074626865671642, 0.5522388059701493]
[0.4393939393939394, 0.6323529411764706]
[0.4090909090909091, 0.6285714285714286]
[0.35294117647058826, 0.23529411764705882]
[0.6666666666666666, 0.4626865671641791]
[0.208955223880597, 0.6029411764705882]
[0.44776119402985076, 0.5942028985507246]
[0.3, 0.6376811594202898]
[0.6268656716417911, 0.5]
[0.44776119402985076, 0.5588235294117647]
[0.6376811594202898, 0.3283582089552239]
[0.7794117647058824, 0.8059701492537313]
[0.5882352941176471, 0.44776119402985076]
[0.22058823529411764, 0.20588235294117646]
[0.37681159420289856, 0.6764705882352942]
[0.5441176470588235, 0.5217391304347826]
[0.45588235294117646, 0.2537313432835821]
[0.6567164179104478, 0.30985915492957744]
[0.2028985507246377, 0.2463768115942029]
[0.4927536231884058, 0.417910447761194]
[0.4411764705882353, 0.6338028169014085]
[0.37142857142857144, 0.47058823529411764]
[0.5942028985507246, 0.6]
[0.7681159420289855, 0.5652173913043478]
[0.6956521739130435, 0.6285714285714286]
[0.4411764

In [44]:
win_columns = ['Team_Win_Pct','Opp_Win_Pct']

win_pct_df = pd.DataFrame(win_final_list,columns = win_columns)

In [45]:
resultdf = pd.concat([resultdf, win_pct_df], axis=1)

In [46]:
resultdf

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF,Team_Win_Pct,Opp_Win_Pct
0,"Tue, Oct 28, 2014",8:00p,Orlando Magic,84,New Orleans Pelicans,101,https://www.basketball-reference.com/boxscores...,,17097,,...,26,36,62,20,10,17,9,17,0.000000,1.000000
1,"Tue, Oct 28, 2014",8:00p,Dallas Mavericks,100,San Antonio Spurs,101,https://www.basketball-reference.com/boxscores...,,19615,,...,9,29,38,23,5,3,20,20,0.000000,1.000000
2,"Tue, Oct 28, 2014",10:30p,Houston Rockets,108,Los Angeles Lakers,90,https://www.basketball-reference.com/boxscores...,,18997,,...,11,25,36,16,7,3,11,32,1.000000,0.000000
3,"Wed, Oct 29, 2014",7:00p,Milwaukee Bucks,106,Charlotte Hornets,108,https://www.basketball-reference.com/boxscores...,OT,19439,,...,15,35,50,27,8,9,10,20,0.000000,1.000000
4,"Wed, Oct 29, 2014",7:00p,Philadelphia 76ers,91,Indiana Pacers,103,https://www.basketball-reference.com/boxscores...,,18165,,...,14,36,50,22,5,13,17,24,0.000000,1.000000
5,"Wed, Oct 29, 2014",7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,,...,16,32,48,26,13,9,9,22,0.000000,1.000000
6,"Wed, Oct 29, 2014",7:30p,Washington Wizards,95,Miami Heat,107,https://www.basketball-reference.com/boxscores...,,19744,,...,12,32,44,20,9,3,14,17,0.000000,1.000000
7,"Wed, Oct 29, 2014",7:30p,Brooklyn Nets,105,Boston Celtics,121,https://www.basketball-reference.com/boxscores...,,18624,,...,9,26,35,28,11,5,12,25,0.000000,1.000000
8,"Wed, Oct 29, 2014",8:00p,Minnesota Timberwolves,101,Memphis Grizzlies,105,https://www.basketball-reference.com/boxscores...,,17731,,...,10,23,33,21,10,5,17,21,0.000000,1.000000
9,"Wed, Oct 29, 2014",8:00p,Chicago Bulls,104,New York Knicks,80,https://www.basketball-reference.com/boxscores...,,19812,,...,14,24,38,19,9,5,11,22,1.000000,0.000000


In [47]:
#visitor stats
visitor_df = resultdf.iloc[:,np.r_[0:28,45]]

#home stats
home_df = resultdf.iloc[:,np.r_[0:2,4:6,2:4,6:11,28:45,46]]


In [48]:
#Create bool indicator if the team is home or away
list_of_zeros = [0]*visitor_df.shape[0]
list_of_ones = [1]*home_df.shape[0]

away_bool_df = pd.DataFrame(list_of_zeros,columns = ['Home Team'])
home_bool_df = pd.DataFrame(list_of_ones,columns = ['Home Team'])

In [49]:
#Add new column for binary home or away
visitor_df = pd.concat([visitor_df, away_bool_df], axis=1)
home_df = pd.concat([home_df, home_bool_df], axis=1)

In [50]:
visitor_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']


home_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']

In [51]:
#Combine dfs and clean-up unnecessary columns
frames = [visitor_df, home_df]
agg_data = pd.concat(frames,ignore_index=True)
agg_data['date_game'] = agg_data['Date']
del agg_data['Date']

In [52]:
ordered_agg_data = agg_data.sort_values(by = ['team_name','date_game']).reset_index()

In [53]:
ordered_agg_data.head()

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,Team_win_pct,Home Team
0,5,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,...,10,32,42,26,6,8,17,24,0.0,0
1,1141,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,19118,...,3,34,37,26,10,6,12,20,0.5,1
2,63,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,18581,...,10,27,37,26,14,5,13,25,0.333333,0
3,70,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,2OT,15891,...,7,31,38,28,8,3,19,33,0.25,0
4,1193,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,17521,...,12,29,41,18,10,5,8,17,0.4,1


In [54]:
rolling_stat_data = agg_data.sort_values(by = ['team_name','date_game'])

testdata_grouped_rolling = rolling_stat_data.groupby('team_name')[['team_pts','opposing_team_pts','FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF']].rolling(window=30, min_periods=1).mean().reset_index()

In [55]:
del testdata_grouped_rolling['level_1']
del testdata_grouped_rolling['team_name']

In [56]:
testdata_grouped_rolling.columns = ['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF']

In [57]:
full_stats_data = pd.concat([ordered_agg_data ,testdata_grouped_rolling], axis=1)
full_stats_data.head(5)

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,Avg1 FTA,Avg1 FT%,Avg1 ORB,Avg1 DRB,Avg1 TRB,Avg1 AST,Avg1 STL,Avg1 BLK,Avg1 TOV,Avg1 PF
0,5,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,...,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0
1,1141,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,19118,...,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0
2,63,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,18581,...,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0
3,70,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,2OT,15891,...,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5
4,1193,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,17521,...,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8


In [58]:
#Shift average stats so each row has the average of the games previously
full_stats_data[['Avg Pts','Avg Opp Pts','Avg FG','Avg FGA','Avg FG %',
       'Avg 3P','Avg 3PA','Avg 3P%','Avg FT','Avg FTA','Avg FT%',
       'Avg ORB','Avg DRB','Avg TRB','Avg AST','Avg STL','Avg BLK',
       'Avg TOV','Avg PF']] = full_stats_data.groupby('team_name')['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'].apply(lambda grp: grp.shift(1))

In [59]:
#Deleted non-shifted columns

full_stats_data = full_stats_data.drop(['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'],axis =1)

In [60]:
full_stats_data['Previous Game Data'] = (full_stats_data.groupby('team_name')['date_game']
                                            .apply(lambda grp: grp.shift(1)))

In [61]:
#Do calculation for back to back games
full_stats_data['Time between games'] = full_stats_data['date_game'] - full_stats_data['Previous Game Data']

In [62]:
full_stats_data['Win_pct'] = (full_stats_data.groupby('team_name')['Team_win_pct']
                                            .apply(lambda grp: grp.shift(1)))

In [63]:
#Replicate df to join, so each line has team average and opposing team average

rep_full_stats_data = full_stats_data

rep_full_stats_data.head()

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,Avg DRB,Avg TRB,Avg AST,Avg STL,Avg BLK,Avg TOV,Avg PF,Previous Game Data,Time between games,Win_pct
0,5,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,...,,,,,,,,NaT,NaT,
1,1141,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,19118,...,32.0,42.0,26.0,6.0,8.0,17.0,24.0,2014-10-29,3 days,0.0
2,63,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,18581,...,33.0,39.5,26.0,8.0,7.0,14.5,22.0,2014-11-01,4 days,0.5
3,70,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,2OT,15891,...,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2014-11-05,2 days,0.333333
4,1193,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,17521,...,31.0,38.5,26.5,9.5,5.5,15.25,25.5,2014-11-07,1 days,0.25


In [64]:
pd.set_option('display.max_columns', 999)

In [65]:
newDf = pd.merge(full_stats_data, rep_full_stats_data, left_on = ['date_game','opposing_team'],right_on = ['date_game','team_name'])

newDf.head()


Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,FG_x,FGA_x,FG %_x,3P_x,3PA_x,3P%_x,FT_x,FTA_x,FT%_x,ORB_x,DRB_x,TRB_x,AST_x,STL_x,BLK_x,TOV_x,PF_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,index_y,game_start_time_y,team_name_y,team_pts_y,opposing_team_y,opposing_team_pts_y,box_score_text_y,overtimes_y,attendance_y,game_remarks_y,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,5,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,,40,80,0.5,13,22,0.591,9,17,0.529,10,32,42,26,6,8,17,24,0.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,1115,7:30p,Toronto Raptors,109,Atlanta Hawks,102,https://www.basketball-reference.com/boxscores...,,19800,,37,90,0.411,8,26,0.308,27,33,0.818,16,32,48,26,13,9,9,22,1.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,1141,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,19118,,35,69,0.507,7,20,0.35,25,33,0.758,3,34,37,26,10,6,12,20,0.5,1,102.0,109.0,40.0,80.0,0.5,13.0,22.0,0.591,9.0,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0,2014-10-29,3 days,0.0,31,7:30p,Indiana Pacers,92,Atlanta Hawks,102,https://www.basketball-reference.com/boxscores...,,19118,,31,81,0.383,12,32,0.375,18,21,0.857,11,33,44,25,5,5,18,26,0.333333,0,96.0,94.0,36.0,77.5,0.464,8.0,23.0,0.3505,16.0,22.0,0.746,12.5,35.5,48.0,20.5,5.0,8.0,17.5,19.5,2014-10-31,1 days,0.5
2,63,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,18581,,38,92,0.413,8,25,0.32,8,11,0.727,10,27,37,26,14,5,13,25,0.333333,0,102.0,100.5,37.5,74.5,0.5035,10.0,21.0,0.4705,17.0,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0,2014-11-01,4 days,0.5,1173,8:30p,San Antonio Spurs,94,Atlanta Hawks,92,https://www.basketball-reference.com/boxscores...,,18581,,31,69,0.449,5,17,0.294,27,38,0.711,11,39,50,25,7,9,19,15,0.666667,1,95.0,97.0,34.5,74.0,0.4695,11.0,26.5,0.41,15.0,19.5,0.776,7.0,34.5,41.5,21.5,4.0,3.0,16.0,22.5,2014-10-31,5 days,0.5
3,70,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,2OT,15891,,43,93,0.462,13,33,0.394,20,26,0.769,7,31,38,28,8,3,19,33,0.25,0,98.666667,98.333333,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.0,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2014-11-05,2 days,0.333333,1180,7:00p,Charlotte Hornets,122,Atlanta Hawks,119,https://www.basketball-reference.com/boxscores...,2OT,15891,,48,97,0.495,6,21,0.286,20,27,0.741,11,40,51,31,6,7,19,30,0.5,1,91.4,92.4,34.4,81.0,0.4256,5.4,17.8,0.3072,17.2,23.6,0.7412,9.4,33.6,43.0,22.0,7.0,5.0,13.2,18.2,2014-11-05,2 days,0.4
4,1193,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,17521,,33,81,0.407,9,22,0.409,28,36,0.778,12,29,41,18,10,5,8,17,0.4,1,103.75,104.25,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,2014-11-07,1 days,0.25,83,7:30p,New York Knicks,96,Atlanta Hawks,103,https://www.basketball-reference.com/boxscores...,,17521,,40,84,0.476,8,21,0.381,8,11,0.727,13,31,44,26,2,6,15,29,0.285714,0,91.333333,98.833333,35.5,82.166667,0.435833,7.0,16.5,0.43,13.333333,17.0,0.783667,12.0,27.5,39.5,22.5,6.333333,3.5,13.166667,24.0,2014-11-07,1 days,0.333333


In [66]:
cleandNewDf2 = newDf.drop(newDf.loc[:,'index_y':'game_remarks_y'],axis = 1)

In [67]:
# cleandNewDf3 = cleandNewDf2.drop(cleandNewDf2.loc[:,'Avg1 Pts_y':'Avg1 PF_y'],axis = 1)

In [68]:
# cleandNewDf3.head()

In [69]:
cleandNewDf4 = cleandNewDf2.drop(cleandNewDf2.loc[:,'FG_x':'PF_x'],axis = 1)

In [70]:
cleandNewDf4.head()

Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,5,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,19800,,0.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,37,90,0.411,8,26,0.308,27,33,0.818,16,32,48,26,13,9,9,22,1.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,1141,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,19118,,0.5,1,102.0,109.0,40.0,80.0,0.5,13.0,22.0,0.591,9.0,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0,2014-10-29,3 days,0.0,31,81,0.383,12,32,0.375,18,21,0.857,11,33,44,25,5,5,18,26,0.333333,0,96.0,94.0,36.0,77.5,0.464,8.0,23.0,0.3505,16.0,22.0,0.746,12.5,35.5,48.0,20.5,5.0,8.0,17.5,19.5,2014-10-31,1 days,0.5
2,63,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,18581,,0.333333,0,102.0,100.5,37.5,74.5,0.5035,10.0,21.0,0.4705,17.0,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0,2014-11-01,4 days,0.5,31,69,0.449,5,17,0.294,27,38,0.711,11,39,50,25,7,9,19,15,0.666667,1,95.0,97.0,34.5,74.0,0.4695,11.0,26.5,0.41,15.0,19.5,0.776,7.0,34.5,41.5,21.5,4.0,3.0,16.0,22.5,2014-10-31,5 days,0.5
3,70,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,2OT,15891,,0.25,0,98.666667,98.333333,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.0,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2014-11-05,2 days,0.333333,48,97,0.495,6,21,0.286,20,27,0.741,11,40,51,31,6,7,19,30,0.5,1,91.4,92.4,34.4,81.0,0.4256,5.4,17.8,0.3072,17.2,23.6,0.7412,9.4,33.6,43.0,22.0,7.0,5.0,13.2,18.2,2014-11-05,2 days,0.4
4,1193,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,17521,,0.4,1,103.75,104.25,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,2014-11-07,1 days,0.25,40,84,0.476,8,21,0.381,8,11,0.727,13,31,44,26,2,6,15,29,0.285714,0,91.333333,98.833333,35.5,82.166667,0.435833,7.0,16.5,0.43,13.333333,17.0,0.783667,12.0,27.5,39.5,22.5,6.333333,3.5,13.166667,24.0,2014-11-07,1 days,0.333333


In [71]:
cleandNewDf5 = cleandNewDf4.drop(cleandNewDf4.loc[:,'FG_y':'PF_y'],axis = 1)

In [72]:
cleandNewDf = cleandNewDf5.drop(['index_x','overtimes_x','attendance_x'],axis =1)

In [73]:
cleandNewDf.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,0.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,,1.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,0.5,1,102.0,109.0,40.0,80.0,0.5,13.0,22.0,0.591,9.0,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0,2014-10-29,3 days,0.0,0.333333,0,96.0,94.0,36.0,77.5,0.464,8.0,23.0,0.3505,16.0,22.0,0.746,12.5,35.5,48.0,20.5,5.0,8.0,17.5,19.5,2014-10-31,1 days,0.5
2,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,0.333333,0,102.0,100.5,37.5,74.5,0.5035,10.0,21.0,0.4705,17.0,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0,2014-11-01,4 days,0.5,0.666667,1,95.0,97.0,34.5,74.0,0.4695,11.0,26.5,0.41,15.0,19.5,0.776,7.0,34.5,41.5,21.5,4.0,3.0,16.0,22.5,2014-10-31,5 days,0.5
3,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,,0.25,0,98.666667,98.333333,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.0,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2014-11-05,2 days,0.333333,0.5,1,91.4,92.4,34.4,81.0,0.4256,5.4,17.8,0.3072,17.2,23.6,0.7412,9.4,33.6,43.0,22.0,7.0,5.0,13.2,18.2,2014-11-05,2 days,0.4
4,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,0.4,1,103.75,104.25,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,2014-11-07,1 days,0.25,0.285714,0,91.333333,98.833333,35.5,82.166667,0.435833,7.0,16.5,0.43,13.333333,17.0,0.783667,12.0,27.5,39.5,22.5,6.333333,3.5,13.166667,24.0,2014-11-07,1 days,0.333333


In [74]:
cleandNewDf = cleandNewDf.drop(['Team_win_pct_x','Team_win_pct_y'],axis =1)

In [75]:
cleandNewDf = cleandNewDf.drop(['Avg Opp Pts_x','Avg Opp Pts_y'],axis =1)

In [76]:
#Delete duplicates

dedupeData = cleandNewDf.drop_duplicates(subset ='box_score_text_x',keep = 'first')

dedupeData.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Home Team_x,Avg Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Home Team_y,Avg Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,NaT,,1,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,1,102.0,40.0,80.0,0.5,13.0,22.0,0.591,9.0,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0,2014-10-29,3 days,0.0,0,96.0,36.0,77.5,0.464,8.0,23.0,0.3505,16.0,22.0,0.746,12.5,35.5,48.0,20.5,5.0,8.0,17.5,19.5,2014-10-31,1 days,0.5
2,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,0,102.0,37.5,74.5,0.5035,10.0,21.0,0.4705,17.0,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0,2014-11-01,4 days,0.5,1,95.0,34.5,74.0,0.4695,11.0,26.5,0.41,15.0,19.5,0.776,7.0,34.5,41.5,21.5,4.0,3.0,16.0,22.5,2014-10-31,5 days,0.5
3,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,,0,98.666667,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.0,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2014-11-05,2 days,0.333333,1,91.4,34.4,81.0,0.4256,5.4,17.8,0.3072,17.2,23.6,0.7412,9.4,33.6,43.0,22.0,7.0,5.0,13.2,18.2,2014-11-05,2 days,0.4
4,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,1,103.75,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,2014-11-07,1 days,0.25,0,91.333333,35.5,82.166667,0.435833,7.0,16.5,0.43,13.333333,17.0,0.783667,12.0,27.5,39.5,22.5,6.333333,3.5,13.166667,24.0,2014-11-07,1 days,0.333333


In [77]:
dedupeData.shape

(1110, 52)

In [78]:
dedupeData.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text',
       'game_remarks','Team Home?','Avg Team Pts', 
       'Avg Team FG', 'Avg Team FGA', 'Avg Team FG %', 'Avg Team 3P', 'Avg Team 3PA',
       'Avg Team 3P%', 'Avg Team FT', 'Avg Team FTA', 'Avg Team FT%', 'Avg Team ORB',
       'Avg Team DRB', 'Avg Team TRB', 'Avg Team AST', 'Avg Team STL', 'Avg Team BLK',
       'Avg Team TOV', 'Avg Team PF', 'Previous Game Data_x', 'Team Time between previous game',
       'Team_Win_Pct','Home Team_y', 'Avg Opp Pts',
       'Avg Opp FG', 'Avg Opp FGA', 'Avg Opp FG %', 'Avg Opp 3P', 'Avg Opp 3PA',
       'Avg Opp 3P%', 'Avg Opp FT', 'Avg Opp FTA', 'Avg Opp FT%', 'Avg Opp ORB',
       'Avg Opp DRB', 'Avg Opp TRB', 'Avg Opp AST', 'Avg Opp STL', 'Avg Opp BLK',
       'Avg Opp TOV', 'Avg Opp PF','Previous Game Data_y', 'Opp Time between previous game','Opp_Win_Pct']

In [79]:
dedupeCleanDf = dedupeData.drop(['Previous Game Data_x','Home Team_y','Previous Game Data_y'],axis = 1)

In [80]:
dedupeCleanDf.head()

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct
0,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,
1,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,1,102.0,40.0,80.0,0.5,13.0,22.0,0.591,9.0,17.0,0.529,10.0,32.0,42.0,26.0,6.0,8.0,17.0,24.0,3 days,0.0,96.0,36.0,77.5,0.464,8.0,23.0,0.3505,16.0,22.0,0.746,12.5,35.5,48.0,20.5,5.0,8.0,17.5,19.5,1 days,0.5
2,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,0,102.0,37.5,74.5,0.5035,10.0,21.0,0.4705,17.0,25.0,0.6435,6.5,33.0,39.5,26.0,8.0,7.0,14.5,22.0,4 days,0.5,95.0,34.5,74.0,0.4695,11.0,26.5,0.41,15.0,19.5,0.776,7.0,34.5,41.5,21.5,4.0,3.0,16.0,22.5,5 days,0.5
3,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,,0,98.666667,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.0,20.333333,0.671333,7.666667,31.0,38.666667,26.0,10.0,6.333333,14.0,23.0,2 days,0.333333,91.4,34.4,81.0,0.4256,5.4,17.8,0.3072,17.2,23.6,0.7412,9.4,33.6,43.0,22.0,7.0,5.0,13.2,18.2,2 days,0.4
4,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,1,103.75,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,1 days,0.25,91.333333,35.5,82.166667,0.435833,7.0,16.5,0.43,13.333333,17.0,0.783667,12.0,27.5,39.5,22.5,6.333333,3.5,13.166667,24.0,1 days,0.333333


In [81]:
#Shift date to check if back-to-back game 

def tb2b(row):
    if pd.isnull(row['Team Time between previous game']):
        return(0)
    elif row['Team Time between previous game'].days == 1:
        return(1)
    return(0)

def ob2b(row):
    if pd.isnull(row['Opp Time between previous game']):
        return(0)
    elif row['Opp Time between previous game'].days == 1:
        return(1)
    return(0)
    
dedupeCleanDf['Team B2B?'] = dedupeCleanDf.apply(tb2b,axis = 1)
dedupeCleanDf['Opp B2B?'] = dedupeCleanDf.apply(ob2b,axis = 1)

In [82]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?
0,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0
1,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,1,102.000000,40.000000,80.000000,0.500000,13.000000,22.000000,0.591000,9.000000,17.000000,0.529000,10.000000,32.000000,42.000000,26.000000,6.000000,8.000000,17.000000,24.000000,3 days,0.000000,96.000000,36.000000,77.500000,0.464000,8.000000,23.000000,0.350500,16.000000,22.000000,0.746000,12.500000,35.500000,48.000000,20.500000,5.000000,8.000000,17.500000,19.500000,1 days,0.500000,0,1
2,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,0,102.000000,37.500000,74.500000,0.503500,10.000000,21.000000,0.470500,17.000000,25.000000,0.643500,6.500000,33.000000,39.500000,26.000000,8.000000,7.000000,14.500000,22.000000,4 days,0.500000,95.000000,34.500000,74.000000,0.469500,11.000000,26.500000,0.410000,15.000000,19.500000,0.776000,7.000000,34.500000,41.500000,21.500000,4.000000,3.000000,16.000000,22.500000,5 days,0.500000,0,0
3,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,,0,98.666667,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.000000,20.333333,0.671333,7.666667,31.000000,38.666667,26.000000,10.000000,6.333333,14.000000,23.000000,2 days,0.333333,91.400000,34.400000,81.000000,0.425600,5.400000,17.800000,0.307200,17.200000,23.600000,0.741200,9.400000,33.600000,43.000000,22.000000,7.000000,5.000000,13.200000,18.200000,2 days,0.400000,0,0
4,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,1,103.750000,39.000000,83.500000,0.470500,10.250000,25.000000,0.413750,15.500000,21.750000,0.695750,7.500000,31.000000,38.500000,26.500000,9.500000,5.500000,15.250000,25.500000,1 days,0.250000,91.333333,35.500000,82.166667,0.435833,7.000000,16.500000,0.430000,13.333333,17.000000,0.783667,12.000000,27.500000,39.500000,22.500000,6.333333,3.500000,13.166667,24.000000,1 days,0.333333,1,1
5,2014-11-10,7:30p,Atlanta Hawks,91,New York Knicks,85,https://www.basketball-reference.com/boxscores...,,0,103.600000,37.800000,83.000000,0.457800,10.000000,24.400000,0.412800,18.000000,24.600000,0.712200,8.400000,30.600000,39.000000,24.800000,9.600000,5.400000,13.800000,23.800000,2 days,0.400000,92.000000,36.142857,82.428571,0.441571,7.142857,17.142857,0.423000,12.571429,16.142857,0.775571,12.142857,28.000000,40.142857,23.000000,5.714286,3.857143,13.428571,24.714286,2 days,0.285714,0,0
6,2014-11-12,7:30p,Atlanta Hawks,100,Utah Jazz,97,https://www.basketball-reference.com/boxscores...,,1,101.500000,36.000000,81.000000,0.444833,10.000000,24.833333,0.405667,19.500000,25.166667,0.754167,8.500000,30.333333,38.833333,24.000000,9.166667,5.000000,14.000000,22.500000,2 days,0.500000,97.625000,35.625000,77.500000,0.458750,7.750000,23.875000,0.318250,18.625000,23.625000,0.795125,10.750000,30.625000,41.375000,20.500000,5.875000,4.500000,14.250000,18.875000,2 days,0.375000,0,0
7,2014-11-14,7:30p,Atlanta Hawks,114,Miami Heat,103,https://www.basketball-reference.com/boxscores...,,1,101.285714,36.428571,80.285714,0.454571,9.857143,24.142857,0.412000,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.000000,4.857143,14.571429,21.000000,2 days,0.571429,98.750000,36.000000,75.875000,0.473625,8.625000,22.750000,0.379125,18.125000,25.375000,0.707375,8.625000,28.625000,37.250000,23.125000,8.750000,3.125000,13.125000,21.500000,2 days,0.625000,0,0
8,2014-11-15,7:30p,Atlanta Hawks,94,Cleveland Cavaliers,127,https://www.basketball-reference.com/boxscores...,,0,102.875000,37.125000,79.625000,0.467750,10.000000,24.625000,0.409625,18.625000,24.000000,0.759125,8.375000,31.000000,39.375000,25.000000,9.125000,4.875000,14.375000,20.875000,1 days,0.625000,105.142857,36.714286,83.000000,0.444143,7.714286,21.285714,0.352714,24.000000,29.571429,0.811714,11.000000,29.428571,40.428571,19.571429,5.571429,3.714286,11.714286,17.428571,1 days,0.571429,1,1
9,2014-11-18,7:30p,Atlanta Hawks,109,Los Angeles Lakers,114,https://www.basketball-reference.com/boxscores...,,1,101.888889,37.444444,80.777778,0.465111,9.222222,24.333333,0.379222,17.777778,22.777778,0.768778,8.666667,30.444444,39.111111,25.111111,8.777778,5.222222,14.777778,19.888889,3 days,0.555556,101.600000,36.800000,85.000000,0.432700,4.900000,16.100000,0.297800,23.100000,30.300000,0.762100,13.200000,27.600000,40.800000,19.600000,7.600000,4.100000,12.500000,23.600000,2 days,0.100000,0,0


In [83]:
#East Team, West Team

nbaDictwest = {
'Atlanta Hawks': 0,
'Brooklyn Nets': 0,
'Boston Celtics': 0,
'Charlotte Hornets': 0,
'Chicago Bulls': 0,
'Cleveland Cavaliers': 0,
'Dallas Mavericks': 1,
'Denver Nuggets': 1,
'Detroit Pistons': 0,
'Golden State Warriors': 1,
'Houston Rockets': 1,
'Indiana Pacers': 0,
'Los Angeles Clippers': 1,
'Los Angeles Lakers': 1,
'Memphis Grizzlies': 1,
'Miami Heat': 0,
'Milwaukee Bucks': 0,
'Minnesota Timberwolves': 1,
'New Orleans Pelicans': 1,
'New York Knicks': 0,
'Oklahoma City Thunder': 1,
'Orlando Magic': 0,
'Philadelphia 76ers': 0,
'Phoenix Suns': 1,
'Portland Trail Blazers': 1,
'Sacramento Kings': 1,
'San Antonio Spurs': 1,
'Toronto Raptors': 0,
'Utah Jazz': 0,
'Washington Wizards': 0}

dedupeCleanDf['Team West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['team_name']],axis=1)
dedupeCleanDf['Opp West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['opposing_team']],axis=1)

In [84]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?,Team West?,Opp West?
0,2014-10-29,7:30p,Atlanta Hawks,102,Toronto Raptors,109,https://www.basketball-reference.com/boxscores...,,0,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0,0,0
1,2014-11-01,7:30p,Atlanta Hawks,102,Indiana Pacers,92,https://www.basketball-reference.com/boxscores...,,1,102.000000,40.000000,80.000000,0.500000,13.000000,22.000000,0.591000,9.000000,17.000000,0.529000,10.000000,32.000000,42.000000,26.000000,6.000000,8.000000,17.000000,24.000000,3 days,0.000000,96.000000,36.000000,77.500000,0.464000,8.000000,23.000000,0.350500,16.000000,22.000000,0.746000,12.500000,35.500000,48.000000,20.500000,5.000000,8.000000,17.500000,19.500000,1 days,0.500000,0,1,0,0
2,2014-11-05,8:30p,Atlanta Hawks,92,San Antonio Spurs,94,https://www.basketball-reference.com/boxscores...,,0,102.000000,37.500000,74.500000,0.503500,10.000000,21.000000,0.470500,17.000000,25.000000,0.643500,6.500000,33.000000,39.500000,26.000000,8.000000,7.000000,14.500000,22.000000,4 days,0.500000,95.000000,34.500000,74.000000,0.469500,11.000000,26.500000,0.410000,15.000000,19.500000,0.776000,7.000000,34.500000,41.500000,21.500000,4.000000,3.000000,16.000000,22.500000,5 days,0.500000,0,0,0,1
3,2014-11-07,7:00p,Atlanta Hawks,119,Charlotte Hornets,122,https://www.basketball-reference.com/boxscores...,,0,98.666667,37.666667,80.333333,0.473333,9.333333,22.333333,0.420333,14.000000,20.333333,0.671333,7.666667,31.000000,38.666667,26.000000,10.000000,6.333333,14.000000,23.000000,2 days,0.333333,91.400000,34.400000,81.000000,0.425600,5.400000,17.800000,0.307200,17.200000,23.600000,0.741200,9.400000,33.600000,43.000000,22.000000,7.000000,5.000000,13.200000,18.200000,2 days,0.400000,0,0,0,0
4,2014-11-08,7:30p,Atlanta Hawks,103,New York Knicks,96,https://www.basketball-reference.com/boxscores...,,1,103.750000,39.000000,83.500000,0.470500,10.250000,25.000000,0.413750,15.500000,21.750000,0.695750,7.500000,31.000000,38.500000,26.500000,9.500000,5.500000,15.250000,25.500000,1 days,0.250000,91.333333,35.500000,82.166667,0.435833,7.000000,16.500000,0.430000,13.333333,17.000000,0.783667,12.000000,27.500000,39.500000,22.500000,6.333333,3.500000,13.166667,24.000000,1 days,0.333333,1,1,0,0
5,2014-11-10,7:30p,Atlanta Hawks,91,New York Knicks,85,https://www.basketball-reference.com/boxscores...,,0,103.600000,37.800000,83.000000,0.457800,10.000000,24.400000,0.412800,18.000000,24.600000,0.712200,8.400000,30.600000,39.000000,24.800000,9.600000,5.400000,13.800000,23.800000,2 days,0.400000,92.000000,36.142857,82.428571,0.441571,7.142857,17.142857,0.423000,12.571429,16.142857,0.775571,12.142857,28.000000,40.142857,23.000000,5.714286,3.857143,13.428571,24.714286,2 days,0.285714,0,0,0,0
6,2014-11-12,7:30p,Atlanta Hawks,100,Utah Jazz,97,https://www.basketball-reference.com/boxscores...,,1,101.500000,36.000000,81.000000,0.444833,10.000000,24.833333,0.405667,19.500000,25.166667,0.754167,8.500000,30.333333,38.833333,24.000000,9.166667,5.000000,14.000000,22.500000,2 days,0.500000,97.625000,35.625000,77.500000,0.458750,7.750000,23.875000,0.318250,18.625000,23.625000,0.795125,10.750000,30.625000,41.375000,20.500000,5.875000,4.500000,14.250000,18.875000,2 days,0.375000,0,0,0,0
7,2014-11-14,7:30p,Atlanta Hawks,114,Miami Heat,103,https://www.basketball-reference.com/boxscores...,,1,101.285714,36.428571,80.285714,0.454571,9.857143,24.142857,0.412000,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.000000,4.857143,14.571429,21.000000,2 days,0.571429,98.750000,36.000000,75.875000,0.473625,8.625000,22.750000,0.379125,18.125000,25.375000,0.707375,8.625000,28.625000,37.250000,23.125000,8.750000,3.125000,13.125000,21.500000,2 days,0.625000,0,0,0,0
8,2014-11-15,7:30p,Atlanta Hawks,94,Cleveland Cavaliers,127,https://www.basketball-reference.com/boxscores...,,0,102.875000,37.125000,79.625000,0.467750,10.000000,24.625000,0.409625,18.625000,24.000000,0.759125,8.375000,31.000000,39.375000,25.000000,9.125000,4.875000,14.375000,20.875000,1 days,0.625000,105.142857,36.714286,83.000000,0.444143,7.714286,21.285714,0.352714,24.000000,29.571429,0.811714,11.000000,29.428571,40.428571,19.571429,5.571429,3.714286,11.714286,17.428571,1 days,0.571429,1,1,0,0
9,2014-11-18,7:30p,Atlanta Hawks,109,Los Angeles Lakers,114,https://www.basketball-reference.com/boxscores...,,1,101.888889,37.444444,80.777778,0.465111,9.222222,24.333333,0.379222,17.777778,22.777778,0.768778,8.666667,30.444444,39.111111,25.111111,8.777778,5.222222,14.777778,19.888889,3 days,0.555556,101.600000,36.800000,85.000000,0.432700,4.900000,16.100000,0.297800,23.100000,30.300000,0.762100,13.200000,27.600000,40.800000,19.600000,7.600000,4.100000,12.500000,23.600000,2 days,0.100000,0,0,0,1


In [85]:
#Output to csv for saving purposes
export_csv = dedupeCleanDf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\nba_14_15.csv') #Don't forget to add '.csv' at the end of the path
