In [1]:
from bs4 import BeautifulSoup
import requests

from IPython.core.display import display, HTML

import pandas as pd

import re

import numpy as np

import datetime

In [2]:
#Retrieve breakdown of games from october to end of march

urlList = [
    'https://www.basketball-reference.com/leagues/NBA_2016_games-october.html',
    'https://www.basketball-reference.com/leagues/NBA_2016_games-november.html',
    'https://www.basketball-reference.com/leagues/NBA_2016_games-december.html',
    'https://www.basketball-reference.com/leagues/NBA_2016_games-january.html',
    'https://www.basketball-reference.com/leagues/NBA_2016_games-february.html',
    'https://www.basketball-reference.com/leagues/NBA_2016_games-march.html'
]
soupList = []
for url in urlList:
    response = requests.get(url)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    soupList.append(soup)
[soup1,soup2,soup3,soup4,soup5,soup6] = soupList

In [3]:
headerData = soup1.find(class_ = 'overthrow table_container').find_all('tr')[0].find_all('th')

In [4]:
column_headers = []

#get headers
for header in headerData:
    column_headers.append(header['data-stat'])
column_headers

['date_game',
 'game_start_time',
 'visitor_team_name',
 'visitor_pts',
 'home_team_name',
 'home_pts',
 'box_score_text',
 'overtimes',
 'attendance',
 'game_remarks']

In [5]:
fullDataList = []

for soup in soupList:
    cellData = soup.find(class_ = 'overthrow table_container').find_all('tr')
    for row in range(1,len(cellData)):
        rowList = []
        for cell in cellData[row].find_all(lambda tag: tag.name == 'td' or tag.name == 'th'):
            if cell.contents == []:
                value = ''
            elif cell.contents[0].name == 'a':
                if cell.contents[0].contents[0] == 'Box Score':
                    value = 'https://www.basketball-reference.com'+ cell.contents[0]['href']
                else:
                    value = cell.contents[0].contents[0]
            else:
                value = cell.contents[0]
            rowList.append(value)
        fullDataList.append(rowList)

cleanCellData = [x for x in fullDataList if x != []]

cleanCellData

[['Tue, Oct 27, 2015',
  '8:00p',
  'Detroit Pistons',
  '106',
  'Atlanta Hawks',
  '94',
  'https://www.basketball-reference.com/boxscores/201510270ATL.html',
  '',
  '19,187',
  ''],
 ['Tue, Oct 27, 2015',
  '8:00p',
  'Cleveland Cavaliers',
  '95',
  'Chicago Bulls',
  '97',
  'https://www.basketball-reference.com/boxscores/201510270CHI.html',
  '',
  '21,957',
  ''],
 ['Tue, Oct 27, 2015',
  '10:30p',
  'New Orleans Pelicans',
  '95',
  'Golden State Warriors',
  '111',
  'https://www.basketball-reference.com/boxscores/201510270GSW.html',
  '',
  '19,596',
  ''],
 ['Wed, Oct 28, 2015',
  '7:00p',
  'Washington Wizards',
  '88',
  'Orlando Magic',
  '87',
  'https://www.basketball-reference.com/boxscores/201510280ORL.html',
  '',
  '18,846',
  ''],
 ['Wed, Oct 28, 2015',
  '7:30p',
  'Indiana Pacers',
  '99',
  'Toronto Raptors',
  '106',
  'https://www.basketball-reference.com/boxscores/201510280TOR.html',
  '',
  '19,800',
  ''],
 ['Wed, Oct 28, 2015',
  '7:30p',
  'Charlotte Hor

In [6]:
#Combine Header and Data to create intiial DF

df = pd.DataFrame(cleanCellData,columns = column_headers)

In [7]:
def date_change(row):
    t = datetime.datetime.strptime(row['date_game'].replace(',',''), "%a %b %d %Y")
    convert_date = t.strftime('%m/%d/%Y')
    return(convert_date)

df['Date'] = pd.to_datetime(df.apply(date_change,axis = 1))

In [8]:
nbaDict = {
'Atlanta Hawks': 'ATL',
'Brooklyn Nets': 'BRK',
'Boston Celtics': 'BOS',
'Charlotte Hornets': 'CHO',
'Chicago Bulls': 'CHI',
'Cleveland Cavaliers': 'CLE',
'Dallas Mavericks': 'DAL',
'Denver Nuggets': 'DEN',
'Detroit Pistons': 'DET',
'Golden State Warriors': 'GSW',
'Houston Rockets': 'HOU',
'Indiana Pacers': 'IND',
'Los Angeles Clippers': 'LAC',
'Los Angeles Lakers': 'LAL',
'Memphis Grizzlies': 'MEM',
'Miami Heat': 'MIA',
'Milwaukee Bucks': 'MIL',
'Minnesota Timberwolves': 'MIN',
'New Orleans Pelicans': 'NOP',
'New York Knicks': 'NYK',
'Oklahoma City Thunder': 'OKC',
'Orlando Magic': 'ORL',
'Philadelphia 76ers': 'PHI',
'Phoenix Suns': 'PHO',
'Portland Trail Blazers': 'POR',
'Sacramento Kings': 'SAC',
'San Antonio Spurs': 'SAS',
'Toronto Raptors': 'TOR',
'Utah Jazz': 'UTA',
'Washington Wizards': 'WAS'}

In [9]:
statColumns = ['vis FG','vis FGA','vis FG %','vis 3P','vis 3PA',
    'vis 3P%','vis FT','vis FTA','vis FT%','vis ORB',
    'vis DRB','vis TRB','vis AST','vis STL','vis BLK',
    'vis TOV','vis PF','home FG','home FGA','home FG %',
    'home 3P','home 3PA','home 3P%','home FT','home FTA',
    'home FT%','home ORB','home DRB','home TRB','home AST',
    'home STL','home BLK','home TOV','home PF']

In [10]:
def statRecorder(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    statsList = []
    for team in team_list:
        nbaTeam = nbaDict[team]
        boxScoreLink = "box-{}-game-basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [11]:
#Create new stats list incrementally and then concatenate after
stats_list = []

In [12]:
zeroBatch = df[0:1].apply(statRecorder,axis = 1)
for i in zeroBatch:
    stats_list.insert(0,i)

In [13]:
firstBatch = df[1:101].apply(statRecorder,axis = 1)
for i in firstBatch:
    stats_list.append(i)

In [14]:
secondBatch = df[101:201].apply(statRecorder,axis = 1)
for i in secondBatch:
    stats_list.append(i)

In [15]:
thirdBatch = df[201:301].apply(statRecorder,axis = 1)
for i in thirdBatch:
    stats_list.append(i)

In [16]:
fourthBatch = df[301:401].apply(statRecorder,axis = 1)
for i in fourthBatch:
    stats_list.append(i)

In [17]:
fifthBatch = df[401:501].apply(statRecorder,axis = 1)
for i in fifthBatch:
    stats_list.append(i)

In [18]:
sixthBatch = df[501:548].apply(statRecorder,axis = 1)
for i in sixthBatch:
    stats_list.append(i)

In [19]:
df[545:548]

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,Date
545,"Fri, Jan 8, 2016",8:30p,New York Knicks,99,San Antonio Spurs,100,https://www.basketball-reference.com/boxscores...,,18420,,2016-01-08
546,"Fri, Jan 8, 2016",9:30p,Miami Heat,103,Phoenix Suns,95,https://www.basketball-reference.com/boxscores...,,16866,,2016-01-08
547,"Fri, Jan 8, 2016",10:00p,Golden State Warriors,128,Portland Trail Blazers,108,https://www.basketball-reference.com/boxscores...,,20035,,2016-01-08


In [20]:
nbaDictLower = {
'Atlanta Hawks': 'atl',
'Brooklyn Nets': 'brk',
'Boston Celtics': 'bos',
'Charlotte Hornets': 'cho',
'Chicago Bulls': 'chi',
'Cleveland Cavaliers': 'cle',
'Dallas Mavericks': 'dal',
'Denver Nuggets': 'den',
'Detroit Pistons': 'det',
'Golden State Warriors': 'gsw',
'Houston Rockets': 'hou',
'Indiana Pacers': 'ind',
'Los Angeles Clippers': 'lac',
'Los Angeles Lakers': 'lal',
'Memphis Grizzlies': 'mem',
'Miami Heat': 'mia',
'Milwaukee Bucks': 'mil',
'Minnesota Timberwolves': 'min',
'New Orleans Pelicans': 'nop',
'New York Knicks': 'nyk',
'Oklahoma City Thunder': 'okc',
'Orlando Magic': 'orl',
'Philadelphia 76ers': 'phi',
'Phoenix Suns': 'pho',
'Portland Trail Blazers': 'por',
'Sacramento Kings': 'sac',
'San Antonio Spurs': 'sas',
'Toronto Raptors': 'tor',
'Utah Jazz': 'uta',
'Washington Wizards': 'was'}

In [21]:
def statRecorder2(row):
    url1 = row['box_score_text']
    response1 = requests.get(url1)
    page1 = response1.text
    soup1 = BeautifulSoup(page1, "lxml")
    team_list = [row['visitor_team_name'],row['home_team_name']]
    statsList = []
    for team in team_list:
        nbaTeam = nbaDictLower[team]
        boxScoreLink = "box_{}_basic".format(nbaTeam)
        totalStats = soup1.find('table', id =boxScoreLink).find_all('tr')[-1].find_all('td')
        for cell in totalStats[1:-2]:
            statsList.append(cell.contents[0])
    return(statsList)

In [22]:
seventhBatch = df[548:701].apply(statRecorder,axis = 1)
for i in seventhBatch:
    stats_list.append(i)

In [23]:
eighthBatch = df[701:801].apply(statRecorder,axis = 1)
for i in eighthBatch:
    stats_list.append(i)

In [24]:
ninthBatch = df[801:901].apply(statRecorder,axis = 1)
for i in ninthBatch:
    stats_list.append(i)

In [25]:
tenthBatch = df[901:1001].apply(statRecorder,axis = 1)
for i in tenthBatch:
    stats_list.append(i)

In [26]:
eleventhBatch = df[1001:1152].apply(statRecorder,axis = 1)
for i in eleventhBatch:
    stats_list.append(i)

In [27]:
#Create new pd with stats list and stats columns
statsdf = pd.DataFrame(stats_list,columns = statColumns)
statsdf.head()

Unnamed: 0,vis FG,vis FGA,vis FG %,vis 3P,vis 3PA,vis 3P%,vis FT,vis FTA,vis FT%,vis ORB,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,37,96,0.385,12,29,0.414,20,26,0.769,23,...,15,0.8,7,33,40,22,9,4,15,25
1,38,94,0.404,9,29,0.31,10,17,0.588,11,...,23,0.696,7,40,47,13,6,10,13,22
2,35,83,0.422,6,18,0.333,19,27,0.704,8,...,22,0.909,21,35,56,29,8,7,20,29
3,33,84,0.393,7,28,0.25,15,25,0.6,15,...,12,0.667,17,39,56,20,9,6,14,22
4,32,86,0.372,9,23,0.391,26,31,0.839,8,...,39,0.692,9,41,50,19,8,2,20,24


In [28]:
#concatenate original df with stats df
resultdf = pd.concat([df, statsdf], axis=1)
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 27, 2015",8:00p,Detroit Pistons,106,Atlanta Hawks,94,https://www.basketball-reference.com/boxscores...,,19187,,...,15,0.8,7,33,40,22,9,4,15,25
1,"Tue, Oct 27, 2015",8:00p,Cleveland Cavaliers,95,Chicago Bulls,97,https://www.basketball-reference.com/boxscores...,,21957,,...,23,0.696,7,40,47,13,6,10,13,22
2,"Tue, Oct 27, 2015",10:30p,New Orleans Pelicans,95,Golden State Warriors,111,https://www.basketball-reference.com/boxscores...,,19596,,...,22,0.909,21,35,56,29,8,7,20,29
3,"Wed, Oct 28, 2015",7:00p,Washington Wizards,88,Orlando Magic,87,https://www.basketball-reference.com/boxscores...,,18846,,...,12,0.667,17,39,56,20,9,6,14,22
4,"Wed, Oct 28, 2015",7:30p,Indiana Pacers,99,Toronto Raptors,106,https://www.basketball-reference.com/boxscores...,,19800,,...,39,0.692,9,41,50,19,8,2,20,24


In [29]:
#Output to csv for saving purposes
export_csv = resultdf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\stats_data.csv') #Don't forget to add '.csv' at the end of the path

In [30]:
resultdf.head()

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home FTA,home FT%,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF
0,"Tue, Oct 27, 2015",8:00p,Detroit Pistons,106,Atlanta Hawks,94,https://www.basketball-reference.com/boxscores...,,19187,,...,15,0.8,7,33,40,22,9,4,15,25
1,"Tue, Oct 27, 2015",8:00p,Cleveland Cavaliers,95,Chicago Bulls,97,https://www.basketball-reference.com/boxscores...,,21957,,...,23,0.696,7,40,47,13,6,10,13,22
2,"Tue, Oct 27, 2015",10:30p,New Orleans Pelicans,95,Golden State Warriors,111,https://www.basketball-reference.com/boxscores...,,19596,,...,22,0.909,21,35,56,29,8,7,20,29
3,"Wed, Oct 28, 2015",7:00p,Washington Wizards,88,Orlando Magic,87,https://www.basketball-reference.com/boxscores...,,18846,,...,12,0.667,17,39,56,20,9,6,14,22
4,"Wed, Oct 28, 2015",7:30p,Indiana Pacers,99,Toronto Raptors,106,https://www.basketball-reference.com/boxscores...,,19800,,...,39,0.692,9,41,50,19,8,2,20,24


In [31]:
def win_percent(row):
    testurl = row['box_score_text']
    testresponse = requests.get(testurl)
    testpage = testresponse.text
    testsoup = BeautifulSoup(testpage,"lxml")
    wins_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[0])
    losses_team = int(testsoup.findAll('div', text = re.compile("-"))[0].contents[0][0:].split('-')[1])
    win_pct_team = wins_team/(wins_team+losses_team)
    wins_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[0])
    losses_opp = int(testsoup.findAll('div', text = re.compile("-"))[1].contents[0][0:].split('-')[1])
    win_pct_opp = wins_opp/(wins_opp+losses_opp)
    win_list = [win_pct_team, win_pct_opp]
    print(win_list)
    return(win_list)



In [32]:
win_first_batch = resultdf[0:101]
win_second_batch = resultdf[101:201]
win_third_batch = resultdf[201:301]
win_fourth_batch = resultdf[301:401]
win_fifth_batch = resultdf[401:501]
win_sixth_batch = resultdf[501:601]
win_seventh_batch = resultdf[601:701]
win_eighth_batch = resultdf[701:801]
win_ninth_batch = resultdf[801:901]
win_tenth_batch = resultdf[901:1001]
win_eleventh_batch = resultdf[1001:1152]

In [33]:
win_final_list = []
for i in win_first_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[0.0, 1.0]
[1.0, 0.0]
[0.0, 1.0]
[1.0, 0.0]
[0.5, 0.0]
[1.0, 0.0]
[0.0, 1.0]
[1.0, 0.0]
[1.0, 0.0]
[0.5, 0.0]
[0.5, 0.5]
[0.5, 1.0]
[0.5, 0.0]
[1.0, 0.0]
[0.5, 0.6666666666666666]
[0.6666666666666666, 1.0]
[1.0, 0.5]
[1.0, 0.0]
[0.0, 0.6666666666666666]
[0.0, 0.5]
[1.0, 0.5]
[1.0, 0.0]
[0.0, 0.5]
[0.5, 0.5]
[0.6666666666666666, 0.0]
[0.6666666666666666, 0.6666666666666666]
[1.0, 0.0]
[0.0, 0.6666666666666666]
[0.6666666666666666, 0.3333333333333333]
[0.3333333333333333, 1.0]
[0.75, 0.0]
[0.6666666666666666, 0.3333333333333333]
[0.0, 0.6666666666666666]
[0.0, 1.0]
[0.0, 0.75]
[0.3333333333333333, 1.0]
[0.6666666666666666, 0.0]
[0.75, 0.0]
[0.25, 0.0]
[0.75, 0.5]
[0.75, 0.25]
[0.5, 0.6666666666666666]
[0.5, 1.0]
[0.5, 1.0]
[0.6, 0.25]
[0.25, 0.75]
[0.8, 0.5]
[0.25, 0.0]
[1.0, 0.5]
[0.6, 0.25]
[0.5, 0.0]
[0.25, 0.4]
[0.6, 0.75]
[0.0, 0.8333333333333334]
[0.4, 0.8]
[0.2, 0.4]
[0.0, 0.4]
[1.0, 0.6]
[0.2,

In [34]:
for i in win_second_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5, 0.42857142857142855]
[0.7142857142857143, 0.125]
[0.7142857142857143, 1.0]
[0.375, 0.7142857142857143]
[0.5714285714285714, 0.875]
[0.625, 0.42857142857142855]
[0.14285714285714285, 0.625]
[0.5, 0.625]
[0.5, 0.5]
[0.42857142857142855, 0.5714285714285714]
[0.42857142857142855, 0.14285714285714285]
[0.4444444444444444, 0.5]
[0.125, 0.4444444444444444]
[0.6666666666666666, 0.0]
[0.5555555555555556, 0.42857142857142855]
[0.125, 0.8]
[0.625, 0.5]
[0.125, 0.5]
[1.0, 0.3333333333333333]
[0.4444444444444444, 0.5]
[0.625, 0.2222222222222222]
[0.75, 0.4444444444444444]
[0.5, 0.6666666666666666]
[1.0, 0.5]
[0.5555555555555556, 0.5]
[0.4444444444444444, 0.6]
[0.4444444444444444, 0.5]
[0.7272727272727273, 0.5]
[0.8888888888888888, 0.4]
[0.1111111111111111, 0.7]
[0.4444444444444444, 0.6666666666666666]
[0.4, 0.4]
[0.0, 0.6666666666666666]
[0.1111111111111111, 0.5555555555555556]
[0.4444444444444444, 0.5555555555555556]
[0.1111111111111111, 0.3]
[0.5555555555555556, 0.6]
[0.45454545454545453, 0

In [35]:
for i in win_third_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.3333333333333333, 0.5714285714285714]
[0.42857142857142855, 0.7857142857142857]
[0.5333333333333333, 0.6923076923076923]
[0.5, 0.42857142857142855]
[0.0, 0.42857142857142855]
[0.5, 0.7857142857142857]
[0.6, 0.46153846153846156]
[0.6428571428571429, 0.5454545454545454]
[0.5, 0.625]
[0.6, 0.5333333333333333]
[0.5, 0.4]
[0.6923076923076923, 0.375]
[0.14285714285714285, 1.0]
[0.5, 0.6]
[0.5, 0.4666666666666667]
[0.0, 0.5333333333333333]
[0.6428571428571429, 0.5333333333333333]
[0.7333333333333333, 0.625]
[0.5625, 0.3333333333333333]
[0.375, 0.4]
[0.5882352941176471, 0.4666666666666667]
[0.2, 0.625]
[0.5625, 0.8]
[0.26666666666666666, 0.4666666666666667]
[0.5, 0.4666666666666667]
[0.75, 0.5625]
[0.375, 0.5]
[0.46153846153846156, 0.5625]
[0.6666666666666666, 0.47058823529411764]
[0.0, 0.375]
[0.6428571428571429, 0.6666666666666666]
[0.6111111111111112, 0.5294117647058824]
[0.5, 0.6470588235294118]
[0.8125, 0.375]
[1.0, 0.4375]
[0.5, 0.35294117647058826]
[0.25, 0.5]
[0.6470588235294118, 0.

In [36]:
for i in win_fourth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.5714285714285714, 0.4444444444444444]
[0.3181818181818182, 0.6]
[0.5454545454545454, 0.6]
[0.8181818181818182, 0.045454545454545456]
[0.47368421052631576, 0.631578947368421]
[0.5909090909090909, 0.45454545454545453]
[0.14285714285714285, 0.5909090909090909]
[0.4090909090909091, 0.6111111111111112]
[0.4090909090909091, 0.4090909090909091]
[0.5714285714285714, 0.4]
[0.5714285714285714, 0.23809523809523808]
[0.391304347826087, 0.6666666666666666]
[1.0, 0.6]
[0.45454545454545453, 0.2857142857142857]
[0.6190476190476191, 0.5454545454545454]
[0.5714285714285714, 0.36363636363636365]
[0.47368421052631576, 0.34782608695652173]
[0.5789473684210527, 0.5909090909090909]
[0.6, 0.6190476190476191]
[0.4782608695652174, 0.45]
[0.5652173913043478, 0.5217391304347826]
[0.782608695652174, 0.6086956521739131]
[0.5909090909090909, 0.391304347826087]
[0.13636363636363635, 0.42857142857142855]
[0.5454545454545454, 0.43478260869565216]
[0.43478260869565216, 0.5]
[0.6086956521739131, 0.5652173913043478]
[0

In [37]:
for i in win_fifth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.6153846153846154, 0.5172413793103449]
[0.4074074074074074, 0.25925925925925924]
[0.3793103448275862, 0.6153846153846154]
[0.034482758620689655, 0.72]
[0.3793103448275862, 0.41379310344827586]
[0.5862068965517241, 0.5555555555555556]
[0.4074074074074074, 0.5862068965517241]
[0.2962962962962963, 0.4074074074074074]
[0.39285714285714285, 0.46153846153846156]
[0.39285714285714285, 0.5357142857142857]
[0.5714285714285714, 0.4827586206896552]
[0.36666666666666664, 0.6]
[0.2857142857142857, 0.5769230769230769]
[0.5555555555555556, 0.5172413793103449]
[0.5925925925925926, 0.8275862068965517]
[0.4, 0.46153846153846156]
[0.6785714285714286, 0.5517241379310345]
[0.5333333333333333, 0.03333333333333333]
[0.5862068965517241, 0.5925925925925926]
[0.5357142857142857, 0.6]
[0.17857142857142858, 0.39285714285714285]
[0.5517241379310345, 0.5357142857142857]
[0.4666666666666667, 0.7307692307692307]
[0.41379310344827586, 0.5714285714285714]
[0.5, 0.5862068965517241]
[0.5161290322580645, 0.4814814814814

In [38]:
for i in win_sixth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.45714285714285713, 0.8285714285714286]
[0.5142857142857142, 0.46875]
[0.35294117647058826, 0.9393939393939394]
[0.08333333333333333, 0.6285714285714286]
[0.6, 0.45714285714285713]
[0.625, 0.6]
[0.6060606060606061, 0.46875]
[0.4166666666666667, 0.34285714285714286]
[0.32432432432432434, 0.22857142857142856]
[0.5833333333333334, 0.71875]
[0.34285714285714286, 0.10810810810810811]
[0.5588235294117647, 0.29411764705882354]
[0.5428571428571428, 0.5428571428571428]
[0.5588235294117647, 0.6176470588235294]
[0.8333333333333334, 0.3888888888888889]
[0.4117647058823529, 0.6857142857142857]
[0.4722222222222222, 0.45454545454545453]
[0.5277777777777778, 0.40540540540540543]
[0.5, 0.9411764705882353]
[0.4722222222222222, 0.5833333333333334]
[0.3783783783783784, 0.6363636363636364]
[0.4, 0.5714285714285714]
[0.9428571428571428, 0.2222222222222222]
[0.4864864864864865, 0.6]
[0.5714285714285714, 0.5277777777777778]
[0.7272727272727273, 0.45454545454545453]
[0.5555555555555556, 0.5428571428571428]
[

In [39]:
for i in win_seventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.4186046511627907, 0.45]
[0.2682926829268293, 0.5853658536585366]
[0.9024390243902439, 0.55]
[0.4186046511627907, 0.11904761904761904]
[0.5365853658536586, 0.48717948717948717]
[0.47619047619047616, 0.5476190476190477]
[0.21428571428571427, 0.45]
[0.425, 0.65]
[0.30952380952380953, 0.30952380952380953]
[0.5609756097560976, 0.7142857142857143]
[0.5476190476190477, 0.8571428571428571]
[0.5365853658536586, 0.3902439024390244]
[0.5238095238095238, 0.20930232558139536]
[0.11627906976744186, 0.4883720930232558]
[0.43902439024390244, 0.4634146341463415]
[0.4318181818181818, 0.475]
[0.325, 0.5581395348837209]
[0.6, 0.5365853658536586]
[0.5, 0.5952380952380952]
[0.2619047619047619, 0.6341463414634146]
[0.9047619047619048, 0.717948717948718]
[0.5238095238095238, 0.5581395348837209]
[0.5116279069767442, 0.6585365853658537]
[0.4318181818181818, 0.5476190476190477]
[0.3023255813953488, 0.34146341463414637]
[0.7209302325581395, 0.38095238095238093]
[0.5476190476190477, 0.3023255813953488]
[0.13636

In [40]:
for i in win_eighth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.2553191489361702, 0.5510204081632653]
[0.2916666666666667, 0.45652173913043476]
[0.46808510638297873, 0.4583333333333333]
[0.1836734693877551, 0.6595744680851063]
[0.9148936170212766, 0.14583333333333334]
[0.5208333333333334, 0.6808510638297872]
[0.375, 0.5319148936170213]
[0.25, 0.391304347826087]
[0.4666666666666667, 0.5]
[0.425531914893617, 0.5833333333333334]
[0.8297872340425532, 0.7391304347826086]
[0.5652173913043478, 0.6666666666666666]
[0.5510204081632653, 0.5625]
[0.5510204081632653, 0.45652173913043476]
[0.2857142857142857, 0.56]
[0.9166666666666666, 0.46]
[0.2857142857142857, 0.46938775510204084]
[0.4791666666666667, 0.18]
[0.7446808510638298, 0.5208333333333334]
[0.5306122448979592, 0.24489795918367346]
[0.5490196078431373, 0.56]
[0.5918367346938775, 0.3829787234042553]
[0.45652173913043476, 0.74]
[0.44680851063829785, 0.8333333333333334]
[0.6666666666666666, 0.3877551020408163]
[0.5531914893617021, 0.46808510638297873]
[0.4, 0.4375]
[0.56, 0.45098039215686275]
[0.551020

In [41]:
for i in win_ninth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.49056603773584906, 0.46153846153846156]
[0.5094339622641509, 0.7358490566037735]
[0.8333333333333334, 0.6666666666666666]
[0.5178571428571429, 0.4528301886792453]
[0.4909090909090909, 0.4716981132075472]
[0.4107142857142857, 0.2727272727272727]
[0.5555555555555556, 0.5535714285714286]
[0.660377358490566, 0.5185185185185185]
[0.3090909090909091, 0.5925925925925926]
[0.5185185185185185, 0.4]
[0.14814814814814814, 0.3888888888888889]
[0.5370370370370371, 0.7272727272727273]
[0.5, 0.2545454545454545]
[0.9056603773584906, 0.509090909090909]
[0.4, 0.42592592592592593]
[0.8363636363636363, 0.19642857142857142]
[0.5714285714285714, 0.5]
[0.4107142857142857, 0.543859649122807]
[0.46296296296296297, 0.5636363636363636]
[0.42105263157894735, 0.30357142857142855]
[0.9074074074074074, 0.6545454545454545]
[0.4, 0.48214285714285715]
[0.7407407407407407, 0.7142857142857143]
[0.5789473684210527, 0.39285714285714285]
[0.8392857142857143, 0.25]
[0.5272727272727272, 0.26785714285714285]
[0.545454545454

In [42]:
for i in win_tenth_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.4067796610169492, 0.6]
[0.5245901639344263, 0.4098360655737705]
[0.5, 0.3114754098360656]
[0.5081967213114754, 0.85]
[0.1935483870967742, 0.39344262295081966]
[0.6885245901639344, 0.6666666666666666]
[0.2459016393442623, 0.5737704918032787]
[0.8524590163934426, 0.38333333333333336]
[0.4166666666666667, 0.532258064516129]
[0.6774193548387096, 0.9166666666666666]
[0.5161290322580645, 0.5409836065573771]
[0.25806451612903225, 0.4426229508196721]
[0.5806451612903226, 0.12903225806451613]
[0.3968253968253968, 0.6031746031746031]
[0.5238095238095238, 0.6833333333333333]
[0.4918032786885246, 0.7166666666666667]
[0.45901639344262296, 0.6065573770491803]
[0.3064516129032258, 0.41935483870967744]
[0.2903225806451613, 0.3870967741935484]
[0.5483870967741935, 0.19047619047619047]
[0.59375, 0.7213114754098361]
[0.46774193548387094, 0.3770491803278688]
[0.5, 0.40625]
[0.5238095238095238, 0.4838709677419355]
[0.2857142857142857, 0.31746031746031744]
[0.4838709677419355, 0.5081967213114754]
[0.4098

In [43]:
for i in win_eleventh_batch.apply(win_percent,axis=1):
    win_final_list.append(i)

[0.3939393939393939, 0.20588235294117646]
[0.6764705882352942, 0.5735294117647058]
[0.43283582089552236, 0.5671641791044776]
[0.5, 0.7164179104477612]
[0.5, 0.47761194029850745]
[0.5735294117647058, 0.5]
[0.3235294117647059, 0.5735294117647058]
[0.6417910447761194, 0.5]
[0.373134328358209, 0.3880597014925373]
[0.4057971014492754, 0.9104477611940298]
[0.6865671641791045, 0.5294117647058824]
[0.4852941176470588, 0.1323529411764706]
[0.5735294117647058, 0.5735294117647058]
[0.4057971014492754, 0.5797101449275363]
[0.27941176470588236, 0.5074626865671642]
[0.5652173913043478, 0.43478260869565216]
[0.5072463768115942, 0.8529411764705882]
[0.2647058823529412, 0.4852941176470588]
[0.7205882352941176, 0.4264705882352941]
[0.6811594202898551, 0.13043478260869565]
[0.38235294117647056, 0.5072463768115942]
[0.5652173913043478, 0.6911764705882353]
[0.3188405797101449, 0.5072463768115942]
[0.5142857142857142, 0.36764705882352944]
[0.9117647058823529, 0.4927536231884058]
[0.2753623188405797, 0.20289

In [44]:
win_columns = ['Team_Win_Pct','Opp_Win_Pct']

win_pct_df = pd.DataFrame(win_final_list,columns = win_columns)

In [45]:
resultdf = pd.concat([resultdf, win_pct_df], axis=1)

In [46]:
resultdf

Unnamed: 0,date_game,game_start_time,visitor_team_name,visitor_pts,home_team_name,home_pts,box_score_text,overtimes,attendance,game_remarks,...,home ORB,home DRB,home TRB,home AST,home STL,home BLK,home TOV,home PF,Team_Win_Pct,Opp_Win_Pct
0,"Tue, Oct 27, 2015",8:00p,Detroit Pistons,106,Atlanta Hawks,94,https://www.basketball-reference.com/boxscores...,,19187,,...,7,33,40,22,9,4,15,25,1.000000,0.000000
1,"Tue, Oct 27, 2015",8:00p,Cleveland Cavaliers,95,Chicago Bulls,97,https://www.basketball-reference.com/boxscores...,,21957,,...,7,40,47,13,6,10,13,22,0.000000,1.000000
2,"Tue, Oct 27, 2015",10:30p,New Orleans Pelicans,95,Golden State Warriors,111,https://www.basketball-reference.com/boxscores...,,19596,,...,21,35,56,29,8,7,20,29,0.000000,1.000000
3,"Wed, Oct 28, 2015",7:00p,Washington Wizards,88,Orlando Magic,87,https://www.basketball-reference.com/boxscores...,,18846,,...,17,39,56,20,9,6,14,22,1.000000,0.000000
4,"Wed, Oct 28, 2015",7:30p,Indiana Pacers,99,Toronto Raptors,106,https://www.basketball-reference.com/boxscores...,,19800,,...,9,41,50,19,8,2,20,24,0.000000,1.000000
5,"Wed, Oct 28, 2015",7:30p,Charlotte Hornets,94,Miami Heat,104,https://www.basketball-reference.com/boxscores...,,19724,,...,2,39,41,23,5,7,13,25,0.000000,1.000000
6,"Wed, Oct 28, 2015",7:30p,Chicago Bulls,115,Brooklyn Nets,100,https://www.basketball-reference.com/boxscores...,,17732,,...,16,29,45,19,11,5,13,18,1.000000,0.000000
7,"Wed, Oct 28, 2015",7:30p,Philadelphia 76ers,95,Boston Celtics,112,https://www.basketball-reference.com/boxscores...,,18624,,...,10,31,41,31,10,7,17,23,0.000000,1.000000
8,"Wed, Oct 28, 2015",7:30p,Utah Jazz,87,Detroit Pistons,92,https://www.basketball-reference.com/boxscores...,,18434,,...,8,35,43,16,2,4,10,20,0.000000,1.000000
9,"Wed, Oct 28, 2015",8:00p,Denver Nuggets,105,Houston Rockets,85,https://www.basketball-reference.com/boxscores...,,18240,,...,15,29,44,17,13,8,16,19,1.000000,0.000000


In [47]:
#visitor stats
visitor_df = resultdf.iloc[:,np.r_[0:28,45]]

#home stats
home_df = resultdf.iloc[:,np.r_[0:2,4:6,2:4,6:11,28:45,46]]


In [48]:
#Create bool indicator if the team is home or away
list_of_zeros = [0]*visitor_df.shape[0]
list_of_ones = [1]*home_df.shape[0]

away_bool_df = pd.DataFrame(list_of_zeros,columns = ['Home Team'])
home_bool_df = pd.DataFrame(list_of_ones,columns = ['Home Team'])

In [49]:
#Add new column for binary home or away
visitor_df = pd.concat([visitor_df, away_bool_df], axis=1)
home_df = pd.concat([home_df, home_bool_df], axis=1)

In [50]:
visitor_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']


home_df.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text', 'overtimes',
       'attendance', 'game_remarks', 'Date', 'FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF','Team_win_pct','Home Team']

In [51]:
#Combine dfs and clean-up unnecessary columns
frames = [visitor_df, home_df]
agg_data = pd.concat(frames,ignore_index=True)
agg_data['date_game'] = agg_data['Date']
del agg_data['Date']

In [52]:
ordered_agg_data = agg_data.sort_values(by = ['team_name','date_game']).reset_index()

In [53]:
ordered_agg_data.head()

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,Team_win_pct,Home Team
0,1127,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,19187,...,7,33,40,22,9,4,15,25,0.0,1
1,18,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,19812,...,7,32,39,26,11,4,15,18,0.5,0
2,1153,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,17024,...,8,37,45,23,9,4,15,17,0.666667,1
3,38,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,18691,...,9,39,48,22,9,6,11,16,0.75,0
4,54,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,19600,...,16,35,51,25,10,1,15,14,0.8,0


In [54]:
rolling_stat_data = agg_data.sort_values(by = ['team_name','date_game'])

testdata_grouped_rolling = rolling_stat_data.groupby('team_name')[['team_pts','opposing_team_pts','FG', 'FGA', 'FG %',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF']].rolling(window=30, min_periods=1).mean().reset_index()

In [55]:
del testdata_grouped_rolling['level_1']
del testdata_grouped_rolling['team_name']

In [56]:
testdata_grouped_rolling.columns = ['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF']

In [57]:
full_stats_data = pd.concat([ordered_agg_data ,testdata_grouped_rolling], axis=1)
full_stats_data.head(5)

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,Avg1 FTA,Avg1 FT%,Avg1 ORB,Avg1 DRB,Avg1 TRB,Avg1 AST,Avg1 STL,Avg1 BLK,Avg1 TOV,Avg1 PF
0,1127,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,19187,...,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0
1,18,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,19812,...,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5
2,1153,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,17024,...,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0
3,38,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,18691,...,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0
4,54,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,19600,...,19.8,0.7934,9.4,35.2,44.6,23.6,9.6,3.8,14.2,18.0


In [58]:
#Shift average stats so each row has the average of the games previously
full_stats_data[['Avg Pts','Avg Opp Pts','Avg FG','Avg FGA','Avg FG %',
       'Avg 3P','Avg 3PA','Avg 3P%','Avg FT','Avg FTA','Avg FT%',
       'Avg ORB','Avg DRB','Avg TRB','Avg AST','Avg STL','Avg BLK',
       'Avg TOV','Avg PF']] = full_stats_data.groupby('team_name')['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'].apply(lambda grp: grp.shift(1))

In [59]:
#Deleted non-shifted columns

full_stats_data = full_stats_data.drop(['Avg1 Pts','Avg1 Opp Pts','Avg1 FG','Avg1 FGA','Avg1 FG %',
       'Avg1 3P','Avg1 3PA','Avg1 3P%','Avg1 FT','Avg1 FTA','Avg1 FT%',
       'Avg1 ORB','Avg1 DRB','Avg1 TRB','Avg1 AST','Avg1 STL','Avg1 BLK',
       'Avg1 TOV','Avg1 PF'],axis =1)

In [60]:
full_stats_data['Previous Game Data'] = (full_stats_data.groupby('team_name')['date_game']
                                            .apply(lambda grp: grp.shift(1)))

In [61]:
#Do calculation for back to back games
full_stats_data['Time between games'] = full_stats_data['date_game'] - full_stats_data['Previous Game Data']

In [62]:
full_stats_data['Win_pct'] = (full_stats_data.groupby('team_name')['Team_win_pct']
                                            .apply(lambda grp: grp.shift(1)))

In [63]:
#Replicate df to join, so each line has team average and opposing team average

rep_full_stats_data = full_stats_data

rep_full_stats_data.head()

Unnamed: 0,index,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,overtimes,attendance,...,Avg DRB,Avg TRB,Avg AST,Avg STL,Avg BLK,Avg TOV,Avg PF,Previous Game Data,Time between games,Win_pct
0,1127,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,19187,...,,,,,,,,NaT,NaT,
1,18,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,19812,...,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2015-10-27,2 days,0.0
2,1153,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,17024,...,32.5,39.5,24.0,10.0,4.0,15.0,21.5,2015-10-29,1 days,0.5
3,38,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,18691,...,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2015-10-30,2 days,0.666667
4,54,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,19600,...,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2015-11-01,2 days,0.75


In [64]:
pd.set_option('display.max_columns', 999)

In [65]:
newDf = pd.merge(full_stats_data, rep_full_stats_data, left_on = ['date_game','opposing_team'],right_on = ['date_game','team_name'])

newDf.head()


Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,FG_x,FGA_x,FG %_x,3P_x,3PA_x,3P%_x,FT_x,FTA_x,FT%_x,ORB_x,DRB_x,TRB_x,AST_x,STL_x,BLK_x,TOV_x,PF_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,index_y,game_start_time_y,team_name_y,team_pts_y,opposing_team_y,opposing_team_pts_y,box_score_text_y,overtimes_y,attendance_y,game_remarks_y,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,1127,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,19187,,37,82,0.451,8,27,0.296,12,15,0.8,7,33,40,22,9,4,15,25,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,,0,8:00p,Detroit Pistons,106,Atlanta Hawks,94,https://www.basketball-reference.com/boxscores...,,19187,,37,96,0.385,12,29,0.414,20,26,0.769,23,36,59,23,5,3,15,15,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,18,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,19812,,42,83,0.506,10,24,0.417,18,26,0.692,7,32,39,26,11,4,15,18,0.5,0,94.0,106.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2015-10-27,2 days,0.0,1145,8:00p,New York Knicks,101,Atlanta Hawks,112,https://www.basketball-reference.com/boxscores...,,19812,,38,93,0.409,6,29,0.207,19,21,0.905,16,32,48,21,10,4,21,24,0.5,1,122.0,97.0,42.0,93.0,0.452,9.0,23.0,0.391,29.0,35.0,0.829,19.0,30.0,49.0,24.0,11.0,7.0,11.0,28.0,2015-10-28,1 days,1.0
2,1153,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,17024,,36,83,0.434,8,23,0.348,17,22,0.773,8,37,45,23,9,4,15,17,0.666667,1,103.0,103.5,39.5,82.5,0.4785,9.0,25.5,0.3565,15.0,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5,2015-10-29,1 days,0.5,26,8:00p,Charlotte Hornets,94,Atlanta Hawks,97,https://www.basketball-reference.com/boxscores...,,17024,,36,88,0.409,12,30,0.4,10,13,0.769,13,41,54,23,7,6,18,20,0.0,0,94.0,104.0,33.0,84.0,0.393,6.0,24.0,0.25,22.0,29.0,0.759,11.0,32.0,43.0,16.0,4.0,1.0,8.0,16.0,2015-10-28,2 days,0.0
3,38,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,18691,,37,88,0.42,7,29,0.241,13,14,0.929,9,39,48,22,9,6,11,16,0.75,0,101.0,100.333333,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2015-10-30,2 days,0.666667,1165,2:00p,Charlotte Hornets,92,Atlanta Hawks,94,https://www.basketball-reference.com/boxscores...,,18691,,32,86,0.372,14,37,0.378,14,15,0.933,9,36,45,27,7,13,11,14,0.0,1,94.0,100.5,34.5,86.0,0.401,9.0,27.0,0.325,16.0,21.0,0.764,12.0,36.5,48.5,19.5,5.5,3.5,13.0,18.0,2015-10-30,2 days,0.0
4,54,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,19600,,37,90,0.411,7,21,0.333,17,22,0.773,16,35,51,25,10,1,15,14,0.8,0,99.25,98.25,38.0,84.0,0.45275,8.25,25.75,0.3255,15.0,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2015-11-01,2 days,0.75,1181,7:30p,Miami Heat,92,Atlanta Hawks,98,https://www.basketball-reference.com/boxscores...,,19600,,38,86,0.442,5,28,0.179,11,16,0.688,13,36,49,13,10,9,18,21,0.5,1,101.666667,95.0,37.0,78.666667,0.47,9.0,21.0,0.431667,18.666667,21.333333,0.874333,6.333333,36.0,42.333333,22.666667,7.0,6.333333,12.666667,20.333333,2015-11-01,2 days,0.666667


In [66]:
cleandNewDf2 = newDf.drop(newDf.loc[:,'index_y':'game_remarks_y'],axis = 1)

In [67]:
# cleandNewDf3 = cleandNewDf2.drop(cleandNewDf2.loc[:,'Avg1 Pts_y':'Avg1 PF_y'],axis = 1)

In [68]:
# cleandNewDf3.head()

In [69]:
cleandNewDf4 = cleandNewDf2.drop(cleandNewDf2.loc[:,'FG_x':'PF_x'],axis = 1)

In [70]:
cleandNewDf4.head()

Unnamed: 0,index_x,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,overtimes_x,attendance_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,FG_y,FGA_y,FG %_y,3P_y,3PA_y,3P%_y,FT_y,FTA_y,FT%_y,ORB_y,DRB_y,TRB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,1127,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,19187,,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,,37,96,0.385,12,29,0.414,20,26,0.769,23,36,59,23,5,3,15,15,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,18,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,19812,,0.5,0,94.0,106.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2015-10-27,2 days,0.0,38,93,0.409,6,29,0.207,19,21,0.905,16,32,48,21,10,4,21,24,0.5,1,122.0,97.0,42.0,93.0,0.452,9.0,23.0,0.391,29.0,35.0,0.829,19.0,30.0,49.0,24.0,11.0,7.0,11.0,28.0,2015-10-28,1 days,1.0
2,1153,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,17024,,0.666667,1,103.0,103.5,39.5,82.5,0.4785,9.0,25.5,0.3565,15.0,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5,2015-10-29,1 days,0.5,36,88,0.409,12,30,0.4,10,13,0.769,13,41,54,23,7,6,18,20,0.0,0,94.0,104.0,33.0,84.0,0.393,6.0,24.0,0.25,22.0,29.0,0.759,11.0,32.0,43.0,16.0,4.0,1.0,8.0,16.0,2015-10-28,2 days,0.0
3,38,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,18691,,0.75,0,101.0,100.333333,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2015-10-30,2 days,0.666667,32,86,0.372,14,37,0.378,14,15,0.933,9,36,45,27,7,13,11,14,0.0,1,94.0,100.5,34.5,86.0,0.401,9.0,27.0,0.325,16.0,21.0,0.764,12.0,36.5,48.5,19.5,5.5,3.5,13.0,18.0,2015-10-30,2 days,0.0
4,54,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,19600,,0.8,0,99.25,98.25,38.0,84.0,0.45275,8.25,25.75,0.3255,15.0,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2015-11-01,2 days,0.75,38,86,0.442,5,28,0.179,11,16,0.688,13,36,49,13,10,9,18,21,0.5,1,101.666667,95.0,37.0,78.666667,0.47,9.0,21.0,0.431667,18.666667,21.333333,0.874333,6.333333,36.0,42.333333,22.666667,7.0,6.333333,12.666667,20.333333,2015-11-01,2 days,0.666667


In [71]:
cleandNewDf5 = cleandNewDf4.drop(cleandNewDf4.loc[:,'FG_y':'PF_y'],axis = 1)

In [72]:
cleandNewDf = cleandNewDf5.drop(['index_x','overtimes_x','attendance_x'],axis =1)

In [73]:
cleandNewDf.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Team_win_pct_x,Home Team_x,Avg Pts_x,Avg Opp Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Team_win_pct_y,Home Team_y,Avg Pts_y,Avg Opp Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,0.0,1,,,,,,,,,,,,,,,,,,,,NaT,NaT,,1.0,0,,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,0.5,0,94.0,106.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2015-10-27,2 days,0.0,0.5,1,122.0,97.0,42.0,93.0,0.452,9.0,23.0,0.391,29.0,35.0,0.829,19.0,30.0,49.0,24.0,11.0,7.0,11.0,28.0,2015-10-28,1 days,1.0
2,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,0.666667,1,103.0,103.5,39.5,82.5,0.4785,9.0,25.5,0.3565,15.0,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5,2015-10-29,1 days,0.5,0.0,0,94.0,104.0,33.0,84.0,0.393,6.0,24.0,0.25,22.0,29.0,0.759,11.0,32.0,43.0,16.0,4.0,1.0,8.0,16.0,2015-10-28,2 days,0.0
3,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,0.75,0,101.0,100.333333,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2015-10-30,2 days,0.666667,0.0,1,94.0,100.5,34.5,86.0,0.401,9.0,27.0,0.325,16.0,21.0,0.764,12.0,36.5,48.5,19.5,5.5,3.5,13.0,18.0,2015-10-30,2 days,0.0
4,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,0.8,0,99.25,98.25,38.0,84.0,0.45275,8.25,25.75,0.3255,15.0,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2015-11-01,2 days,0.75,0.5,1,101.666667,95.0,37.0,78.666667,0.47,9.0,21.0,0.431667,18.666667,21.333333,0.874333,6.333333,36.0,42.333333,22.666667,7.0,6.333333,12.666667,20.333333,2015-11-01,2 days,0.666667


In [74]:
cleandNewDf = cleandNewDf.drop(['Team_win_pct_x','Team_win_pct_y'],axis =1)

In [75]:
cleandNewDf = cleandNewDf.drop(['Avg Opp Pts_x','Avg Opp Pts_y'],axis =1)

In [76]:
#Delete duplicates

dedupeData = cleandNewDf.drop_duplicates(subset ='box_score_text_x',keep = 'first')

dedupeData.head()

Unnamed: 0,date_game,game_start_time_x,team_name_x,team_pts_x,opposing_team_x,opposing_team_pts_x,box_score_text_x,game_remarks_x,Home Team_x,Avg Pts_x,Avg FG_x,Avg FGA_x,Avg FG %_x,Avg 3P_x,Avg 3PA_x,Avg 3P%_x,Avg FT_x,Avg FTA_x,Avg FT%_x,Avg ORB_x,Avg DRB_x,Avg TRB_x,Avg AST_x,Avg STL_x,Avg BLK_x,Avg TOV_x,Avg PF_x,Previous Game Data_x,Time between games_x,Win_pct_x,Home Team_y,Avg Pts_y,Avg FG_y,Avg FGA_y,Avg FG %_y,Avg 3P_y,Avg 3PA_y,Avg 3P%_y,Avg FT_y,Avg FTA_y,Avg FT%_y,Avg ORB_y,Avg DRB_y,Avg TRB_y,Avg AST_y,Avg STL_y,Avg BLK_y,Avg TOV_y,Avg PF_y,Previous Game Data_y,Time between games_y,Win_pct_y
0,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,1,,,,,,,,,,,,,,,,,,,NaT,NaT,,0,,,,,,,,,,,,,,,,,,,NaT,NaT,
1,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,0,94.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2015-10-27,2 days,0.0,1,122.0,42.0,93.0,0.452,9.0,23.0,0.391,29.0,35.0,0.829,19.0,30.0,49.0,24.0,11.0,7.0,11.0,28.0,2015-10-28,1 days,1.0
2,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,1,103.0,39.5,82.5,0.4785,9.0,25.5,0.3565,15.0,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5,2015-10-29,1 days,0.5,0,94.0,33.0,84.0,0.393,6.0,24.0,0.25,22.0,29.0,0.759,11.0,32.0,43.0,16.0,4.0,1.0,8.0,16.0,2015-10-28,2 days,0.0
3,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,0,101.0,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2015-10-30,2 days,0.666667,1,94.0,34.5,86.0,0.401,9.0,27.0,0.325,16.0,21.0,0.764,12.0,36.5,48.5,19.5,5.5,3.5,13.0,18.0,2015-10-30,2 days,0.0
4,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,0,99.25,38.0,84.0,0.45275,8.25,25.75,0.3255,15.0,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2015-11-01,2 days,0.75,1,101.666667,37.0,78.666667,0.47,9.0,21.0,0.431667,18.666667,21.333333,0.874333,6.333333,36.0,42.333333,22.666667,7.0,6.333333,12.666667,20.333333,2015-11-01,2 days,0.666667


In [77]:
dedupeData.shape

(1127, 52)

In [78]:
dedupeData.columns = ['date_game', 'game_start_time', 'team_name', 'team_pts',
       'opposing_team', 'opposing_team_pts', 'box_score_text',
       'game_remarks','Team Home?','Avg Team Pts', 
       'Avg Team FG', 'Avg Team FGA', 'Avg Team FG %', 'Avg Team 3P', 'Avg Team 3PA',
       'Avg Team 3P%', 'Avg Team FT', 'Avg Team FTA', 'Avg Team FT%', 'Avg Team ORB',
       'Avg Team DRB', 'Avg Team TRB', 'Avg Team AST', 'Avg Team STL', 'Avg Team BLK',
       'Avg Team TOV', 'Avg Team PF', 'Previous Game Data_x', 'Team Time between previous game',
       'Team_Win_Pct','Home Team_y', 'Avg Opp Pts',
       'Avg Opp FG', 'Avg Opp FGA', 'Avg Opp FG %', 'Avg Opp 3P', 'Avg Opp 3PA',
       'Avg Opp 3P%', 'Avg Opp FT', 'Avg Opp FTA', 'Avg Opp FT%', 'Avg Opp ORB',
       'Avg Opp DRB', 'Avg Opp TRB', 'Avg Opp AST', 'Avg Opp STL', 'Avg Opp BLK',
       'Avg Opp TOV', 'Avg Opp PF','Previous Game Data_y', 'Opp Time between previous game','Opp_Win_Pct']

In [79]:
dedupeCleanDf = dedupeData.drop(['Previous Game Data_x','Home Team_y','Previous Game Data_y'],axis = 1)

In [80]:
dedupeCleanDf.head()

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct
0,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,1,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,
1,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,0,94.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.8,7.0,33.0,40.0,22.0,9.0,4.0,15.0,25.0,2 days,0.0,122.0,42.0,93.0,0.452,9.0,23.0,0.391,29.0,35.0,0.829,19.0,30.0,49.0,24.0,11.0,7.0,11.0,28.0,1 days,1.0
2,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,1,103.0,39.5,82.5,0.4785,9.0,25.5,0.3565,15.0,20.5,0.746,7.0,32.5,39.5,24.0,10.0,4.0,15.0,21.5,1 days,0.5,94.0,33.0,84.0,0.393,6.0,24.0,0.25,22.0,29.0,0.759,11.0,32.0,43.0,16.0,4.0,1.0,8.0,16.0,2 days,0.0
3,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,0,101.0,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.0,0.755,7.333333,34.0,41.333333,23.666667,9.666667,4.0,15.0,20.0,2 days,0.666667,94.0,34.5,86.0,0.401,9.0,27.0,0.325,16.0,21.0,0.764,12.0,36.5,48.5,19.5,5.5,3.5,13.0,18.0,2 days,0.0
4,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,0,99.25,38.0,84.0,0.45275,8.25,25.75,0.3255,15.0,19.25,0.7985,7.75,35.25,43.0,23.25,9.5,4.5,14.0,19.0,2 days,0.75,101.666667,37.0,78.666667,0.47,9.0,21.0,0.431667,18.666667,21.333333,0.874333,6.333333,36.0,42.333333,22.666667,7.0,6.333333,12.666667,20.333333,2 days,0.666667


In [81]:
#Shift date to check if back-to-back game 

def tb2b(row):
    if pd.isnull(row['Team Time between previous game']):
        return(0)
    elif row['Team Time between previous game'].days == 1:
        return(1)
    return(0)

def ob2b(row):
    if pd.isnull(row['Opp Time between previous game']):
        return(0)
    elif row['Opp Time between previous game'].days == 1:
        return(1)
    return(0)
    
dedupeCleanDf['Team B2B?'] = dedupeCleanDf.apply(tb2b,axis = 1)
dedupeCleanDf['Opp B2B?'] = dedupeCleanDf.apply(ob2b,axis = 1)

In [82]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?
0,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,1,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0
1,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,0,94.000000,37.000000,82.000000,0.451000,8.000000,27.000000,0.296000,12.000000,15.000000,0.800000,7.000000,33.000000,40.000000,22.000000,9.000000,4.000000,15.000000,25.000000,2 days,0.000000,122.000000,42.000000,93.000000,0.452000,9.000000,23.000000,0.391000,29.000000,35.000000,0.829000,19.000000,30.000000,49.000000,24.000000,11.000000,7.000000,11.000000,28.000000,1 days,1.000000,0,1
2,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,1,103.000000,39.500000,82.500000,0.478500,9.000000,25.500000,0.356500,15.000000,20.500000,0.746000,7.000000,32.500000,39.500000,24.000000,10.000000,4.000000,15.000000,21.500000,1 days,0.500000,94.000000,33.000000,84.000000,0.393000,6.000000,24.000000,0.250000,22.000000,29.000000,0.759000,11.000000,32.000000,43.000000,16.000000,4.000000,1.000000,8.000000,16.000000,2 days,0.000000,1,0
3,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,0,101.000000,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.000000,0.755000,7.333333,34.000000,41.333333,23.666667,9.666667,4.000000,15.000000,20.000000,2 days,0.666667,94.000000,34.500000,86.000000,0.401000,9.000000,27.000000,0.325000,16.000000,21.000000,0.764000,12.000000,36.500000,48.500000,19.500000,5.500000,3.500000,13.000000,18.000000,2 days,0.000000,0,0
4,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,0,99.250000,38.000000,84.000000,0.452750,8.250000,25.750000,0.325500,15.000000,19.250000,0.798500,7.750000,35.250000,43.000000,23.250000,9.500000,4.500000,14.000000,19.000000,2 days,0.750000,101.666667,37.000000,78.666667,0.470000,9.000000,21.000000,0.431667,18.666667,21.333333,0.874333,6.333333,36.000000,42.333333,22.666667,7.000000,6.333333,12.666667,20.333333,2 days,0.666667,0,0
5,2015-11-04,8:00p,Atlanta Hawks,101,Brooklyn Nets,87,https://www.basketball-reference.com/boxscores...,,1,99.000000,37.800000,85.200000,0.444400,8.000000,24.800000,0.327000,15.400000,19.800000,0.793400,9.400000,35.200000,44.600000,23.600000,9.600000,3.800000,14.200000,18.000000,1 days,0.800000,90.500000,36.000000,82.000000,0.439750,3.500000,14.250000,0.215000,15.000000,20.750000,0.701000,9.000000,29.750000,38.750000,19.500000,5.750000,4.500000,14.500000,20.000000,2 days,0.000000,1,0
6,2015-11-06,8:00p,Atlanta Hawks,121,New Orleans Pelicans,115,https://www.basketball-reference.com/boxscores...,,0,99.333333,37.666667,83.666667,0.451500,8.000000,24.333333,0.333167,16.000000,20.500000,0.793167,8.833333,34.000000,42.833333,24.166667,10.500000,4.833333,14.000000,17.833333,2 days,0.833333,100.750000,37.250000,91.000000,0.408500,9.250000,27.250000,0.340750,17.000000,23.250000,0.726250,9.750000,31.750000,41.500000,21.250000,8.000000,4.500000,14.500000,20.000000,3 days,0.000000,0,0
7,2015-11-07,7:30p,Atlanta Hawks,114,Washington Wizards,99,https://www.basketball-reference.com/boxscores...,,1,102.428571,38.142857,82.714286,0.463000,8.142857,23.857143,0.346857,18.000000,22.571429,0.802286,8.428571,34.142857,42.571429,25.000000,10.428571,5.000000,14.142857,18.285714,1 days,0.857143,103.200000,36.800000,83.400000,0.444400,8.200000,23.800000,0.352400,21.400000,27.800000,0.752600,9.600000,31.800000,41.400000,21.800000,9.000000,5.600000,17.800000,22.400000,1 days,0.600000,1,1
8,2015-11-09,8:00p,Atlanta Hawks,107,Minnesota Timberwolves,117,https://www.basketball-reference.com/boxscores...,,1,103.875000,38.750000,83.875000,0.463500,8.750000,25.000000,0.352750,17.625000,21.625000,0.827000,8.625000,33.500000,42.125000,26.500000,10.625000,4.875000,14.125000,17.875000,2 days,0.875000,98.800000,34.200000,82.800000,0.414000,4.200000,14.400000,0.286800,26.200000,33.200000,0.790200,9.000000,37.200000,46.200000,21.200000,7.000000,5.400000,14.200000,24.400000,2 days,0.600000,0,0
9,2015-11-11,8:00p,Atlanta Hawks,106,New Orleans Pelicans,98,https://www.basketball-reference.com/boxscores...,,1,104.222222,39.333333,84.111111,0.468889,9.222222,25.666667,0.360111,16.333333,20.000000,0.830333,8.111111,32.333333,40.444444,26.888889,10.888889,5.000000,14.444444,18.000000,2 days,0.777778,105.142857,39.428571,89.428571,0.441000,10.000000,28.571429,0.348714,16.285714,21.142857,0.772000,8.714286,32.428571,41.142857,23.000000,8.142857,5.142857,13.285714,22.142857,1 days,0.142857,0,1


In [83]:
#East Team, West Team

nbaDictwest = {
'Atlanta Hawks': 0,
'Brooklyn Nets': 0,
'Boston Celtics': 0,
'Charlotte Hornets': 0,
'Chicago Bulls': 0,
'Cleveland Cavaliers': 0,
'Dallas Mavericks': 1,
'Denver Nuggets': 1,
'Detroit Pistons': 0,
'Golden State Warriors': 1,
'Houston Rockets': 1,
'Indiana Pacers': 0,
'Los Angeles Clippers': 1,
'Los Angeles Lakers': 1,
'Memphis Grizzlies': 1,
'Miami Heat': 0,
'Milwaukee Bucks': 0,
'Minnesota Timberwolves': 1,
'New Orleans Pelicans': 1,
'New York Knicks': 0,
'Oklahoma City Thunder': 1,
'Orlando Magic': 0,
'Philadelphia 76ers': 0,
'Phoenix Suns': 1,
'Portland Trail Blazers': 1,
'Sacramento Kings': 1,
'San Antonio Spurs': 1,
'Toronto Raptors': 0,
'Utah Jazz': 0,
'Washington Wizards': 0}

dedupeCleanDf['Team West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['team_name']],axis=1)
dedupeCleanDf['Opp West?'] = dedupeCleanDf.apply(lambda row: nbaDictwest[row['opposing_team']],axis=1)

In [84]:
dedupeCleanDf

Unnamed: 0,date_game,game_start_time,team_name,team_pts,opposing_team,opposing_team_pts,box_score_text,game_remarks,Team Home?,Avg Team Pts,Avg Team FG,Avg Team FGA,Avg Team FG %,Avg Team 3P,Avg Team 3PA,Avg Team 3P%,Avg Team FT,Avg Team FTA,Avg Team FT%,Avg Team ORB,Avg Team DRB,Avg Team TRB,Avg Team AST,Avg Team STL,Avg Team BLK,Avg Team TOV,Avg Team PF,Team Time between previous game,Team_Win_Pct,Avg Opp Pts,Avg Opp FG,Avg Opp FGA,Avg Opp FG %,Avg Opp 3P,Avg Opp 3PA,Avg Opp 3P%,Avg Opp FT,Avg Opp FTA,Avg Opp FT%,Avg Opp ORB,Avg Opp DRB,Avg Opp TRB,Avg Opp AST,Avg Opp STL,Avg Opp BLK,Avg Opp TOV,Avg Opp PF,Opp Time between previous game,Opp_Win_Pct,Team B2B?,Opp B2B?,Team West?,Opp West?
0,2015-10-27,8:00p,Atlanta Hawks,94,Detroit Pistons,106,https://www.basketball-reference.com/boxscores...,,1,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,NaT,,0,0,0,0
1,2015-10-29,8:00p,Atlanta Hawks,112,New York Knicks,101,https://www.basketball-reference.com/boxscores...,,0,94.000000,37.000000,82.000000,0.451000,8.000000,27.000000,0.296000,12.000000,15.000000,0.800000,7.000000,33.000000,40.000000,22.000000,9.000000,4.000000,15.000000,25.000000,2 days,0.000000,122.000000,42.000000,93.000000,0.452000,9.000000,23.000000,0.391000,29.000000,35.000000,0.829000,19.000000,30.000000,49.000000,24.000000,11.000000,7.000000,11.000000,28.000000,1 days,1.000000,0,1,0,0
2,2015-10-30,8:00p,Atlanta Hawks,97,Charlotte Hornets,94,https://www.basketball-reference.com/boxscores...,,1,103.000000,39.500000,82.500000,0.478500,9.000000,25.500000,0.356500,15.000000,20.500000,0.746000,7.000000,32.500000,39.500000,24.000000,10.000000,4.000000,15.000000,21.500000,1 days,0.500000,94.000000,33.000000,84.000000,0.393000,6.000000,24.000000,0.250000,22.000000,29.000000,0.759000,11.000000,32.000000,43.000000,16.000000,4.000000,1.000000,8.000000,16.000000,2 days,0.000000,1,0,0,0
3,2015-11-01,2:00p,Atlanta Hawks,94,Charlotte Hornets,92,https://www.basketball-reference.com/boxscores...,,0,101.000000,38.333333,82.666667,0.463667,8.666667,24.666667,0.353667,15.666667,21.000000,0.755000,7.333333,34.000000,41.333333,23.666667,9.666667,4.000000,15.000000,20.000000,2 days,0.666667,94.000000,34.500000,86.000000,0.401000,9.000000,27.000000,0.325000,16.000000,21.000000,0.764000,12.000000,36.500000,48.500000,19.500000,5.500000,3.500000,13.000000,18.000000,2 days,0.000000,0,0,0,0
4,2015-11-03,7:30p,Atlanta Hawks,98,Miami Heat,92,https://www.basketball-reference.com/boxscores...,,0,99.250000,38.000000,84.000000,0.452750,8.250000,25.750000,0.325500,15.000000,19.250000,0.798500,7.750000,35.250000,43.000000,23.250000,9.500000,4.500000,14.000000,19.000000,2 days,0.750000,101.666667,37.000000,78.666667,0.470000,9.000000,21.000000,0.431667,18.666667,21.333333,0.874333,6.333333,36.000000,42.333333,22.666667,7.000000,6.333333,12.666667,20.333333,2 days,0.666667,0,0,0,0
5,2015-11-04,8:00p,Atlanta Hawks,101,Brooklyn Nets,87,https://www.basketball-reference.com/boxscores...,,1,99.000000,37.800000,85.200000,0.444400,8.000000,24.800000,0.327000,15.400000,19.800000,0.793400,9.400000,35.200000,44.600000,23.600000,9.600000,3.800000,14.200000,18.000000,1 days,0.800000,90.500000,36.000000,82.000000,0.439750,3.500000,14.250000,0.215000,15.000000,20.750000,0.701000,9.000000,29.750000,38.750000,19.500000,5.750000,4.500000,14.500000,20.000000,2 days,0.000000,1,0,0,0
6,2015-11-06,8:00p,Atlanta Hawks,121,New Orleans Pelicans,115,https://www.basketball-reference.com/boxscores...,,0,99.333333,37.666667,83.666667,0.451500,8.000000,24.333333,0.333167,16.000000,20.500000,0.793167,8.833333,34.000000,42.833333,24.166667,10.500000,4.833333,14.000000,17.833333,2 days,0.833333,100.750000,37.250000,91.000000,0.408500,9.250000,27.250000,0.340750,17.000000,23.250000,0.726250,9.750000,31.750000,41.500000,21.250000,8.000000,4.500000,14.500000,20.000000,3 days,0.000000,0,0,0,1
7,2015-11-07,7:30p,Atlanta Hawks,114,Washington Wizards,99,https://www.basketball-reference.com/boxscores...,,1,102.428571,38.142857,82.714286,0.463000,8.142857,23.857143,0.346857,18.000000,22.571429,0.802286,8.428571,34.142857,42.571429,25.000000,10.428571,5.000000,14.142857,18.285714,1 days,0.857143,103.200000,36.800000,83.400000,0.444400,8.200000,23.800000,0.352400,21.400000,27.800000,0.752600,9.600000,31.800000,41.400000,21.800000,9.000000,5.600000,17.800000,22.400000,1 days,0.600000,1,1,0,0
8,2015-11-09,8:00p,Atlanta Hawks,107,Minnesota Timberwolves,117,https://www.basketball-reference.com/boxscores...,,1,103.875000,38.750000,83.875000,0.463500,8.750000,25.000000,0.352750,17.625000,21.625000,0.827000,8.625000,33.500000,42.125000,26.500000,10.625000,4.875000,14.125000,17.875000,2 days,0.875000,98.800000,34.200000,82.800000,0.414000,4.200000,14.400000,0.286800,26.200000,33.200000,0.790200,9.000000,37.200000,46.200000,21.200000,7.000000,5.400000,14.200000,24.400000,2 days,0.600000,0,0,0,1
9,2015-11-11,8:00p,Atlanta Hawks,106,New Orleans Pelicans,98,https://www.basketball-reference.com/boxscores...,,1,104.222222,39.333333,84.111111,0.468889,9.222222,25.666667,0.360111,16.333333,20.000000,0.830333,8.111111,32.333333,40.444444,26.888889,10.888889,5.000000,14.444444,18.000000,2 days,0.777778,105.142857,39.428571,89.428571,0.441000,10.000000,28.571429,0.348714,16.285714,21.142857,0.772000,8.714286,32.428571,41.142857,23.000000,8.142857,5.142857,13.285714,22.142857,1 days,0.142857,0,1,0,1


In [85]:
#Output to csv for saving purposes
export_csv = dedupeCleanDf.to_csv (r'C:\Users\jeromerufin\Desktop\Metis\nba_15_16.csv') #Don't forget to add '.csv' at the end of the path
