## theScore Data Scraper

The following code snippets allow us to gather betting line data at closing for NBA games.

In [1]:
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [6]:
# constants that affect scrapage
baseURL = 'https://www.thescore.com/nba/events/'
most_recent_game_id = 522292
earliest_game_id = 522225
#The earliest = 507514
# There will be a total of 1080 games this season, 30*72/2; 1230 in a normal season
num_games = most_recent_game_id - earliest_game_id + 1

In [7]:

response_data = []
for game_id in range(most_recent_game_id, earliest_game_id, -1):
    page = requests.get(baseURL + str(game_id))
    soup = BeautifulSoup(page.content, 'html.parser')
    # only true if the game has already finished
    label_text = soup.find_all('div', {'class': 'GameDetailsCard__label--iBMhJ'})
    if not label_text:
        continue
    has_closing_odds = label_text[len(label_text) - 1].contents[0] == 'Closing Odds:'
    if has_closing_odds:
        team_data = soup.find_all('div', {'class': 'Matchup__teamName--vqpde'})
        if not team_data:
            continue
        team_1_abbrv = team_data[0].contents[0].partition(' ')[0]
        team_2_abbrv = team_data[1].contents[0].partition(' ')[0]
        odds = soup.find_all('div', {'class': 'GameDetailsCard__content--2L_KF'})
        if len(odds) > 0:
            odds_string = odds[len(odds) - 1].contents[0]
            odds_partition = odds_string.split()
            if len(odds_partition) < 4:
                continue
            favorite = odds_partition[0]
            spread = odds_partition[1][0:len(odds_partition[1]) - 1]
            ou = odds_partition[3]
            scores = soup.find_all('div', {'class': 'Matchup__teamScore--2BeCA'})
            if not scores:
                continue
            team_1_score = scores[0].contents[0]
            team_2_score = scores[1].contents[0]
            
            stats_page = requests.get(baseURL+str(game_id)+'/stats')
            soup = BeautifulSoup(stats_page.content, 'html.parser')
            players = soup.find_all('div', {'class': 'BoxScore__boxScore--tDnlB'})#BoxScore__statLine--3Daky
            stats = []
            for i in range(0,len(players)):
                for j in range(1, 15):
                    stats.append(players[i].contents[j].contents[0].contents[0].contents[0].contents[0])
                    for k in range(1, len(players[i].contents[j].contents[0].contents[0])):
                        if players[i].contents[j].contents[0].contents[0].contents[k].contents[0].isnumeric():
                            stats.append(int(players[i].contents[j].contents[0].contents[0].contents[k].contents[0]))
                        else:    
                            stats.append(players[i].contents[j].contents[0].contents[0].contents[k].contents[0])
            stats
                        
            responses = [team_1_abbrv, 
                                  team_2_abbrv,
                                  favorite,
                                  spread,
                                  ou,
                                  team_1_score,
                                  team_2_score]
            
            for i in stats:
                responses.append(i)
            response_data.append(responses)


df = pd.DataFrame(response_data,
                  columns = ['team1', 
                             'team2', 
                             'favorite', 
                             'spread', 
                             'over_under', 
                             'team1_score',
                             'team2_score', 'team1p1', 'p1min', 'p1pts', 'p1reb', 'p1ast', 'p1stl', 'p1blk', 'p1pf', 'p1to', 'p1oreb', 'p1dreb', 'p1fg', 'p1fg%', 'p1ft', 'p1ft%', 'p13pt', 'p13pt%', 'p1+/-','team1p2', 'p2min', 'p2pts', 'p2reb', 'p2ast', 'p2stl', 'p2blk', 'p2pf', 'p2to', 'p2oreb', 'p2dreb', 'p2fg', 'p2fg%', 'p2ft', 'p2ft%', 'p23pt', 'p23pt%', 'p2+/-','team1p3', 'p3min', 'p3pts', 'p3reb', 'p3ast', 'p3stl', 'p3blk', 'p3pf', 'p3to', 'p3oreb', 'p3dreb', 'p3fg', 'p3fg%', 'p3ft', 'p3ft%', 'p33pt', 'p33pt%', 'p3+/-','team1p4', 'p4min', 'p4pts', 'p4reb', 'p4ast', 'p4stl', 'p4blk', 'p4pf', 'p4to', 'p4oreb', 'p4dreb', 'p4fg', 'p4fg%', 'p4ft', 'p4ft%', 'p43pt', 'p43pt%', 'p4+/-','team1p5', 'p5min', 'p5pts', 'p5reb', 'p5ast', 'p5stl', 'p5blk', 'p5pf', 'p5to', 'p5oreb', 'p5dreb', 'p5fg', 'p5fg%', 'p5ft', 'p5ft%', 'p53pt', 'p53pt%', 'p5+/-','team1p6', 'p6min', 'p6pts', 'p6reb', 'p6ast', 'p6stl', 'p6blk', 'p6pf', 'p6to', 'p6oreb', 'p6dreb', 'p6fg', 'p6fg%', 'p6ft', 'p6ft%', 'p63pt', 'p63pt%', 'p6+/-','team1p7', 'p7min', 'p7pts', 'p7reb', 'p7ast', 'p7stl', 'p7blk', 'p7pf', 'p7to', 'p7oreb', 'p7dreb', 'p7fg', 'p7fg%', 'p7ft', 'p7ft%', 'p73pt', 'p73pt%', 'p7+/-','team1p8', 'p8min', 'p8pts', 'p8reb', 'p8ast', 'p8stl', 'p8blk', 'p8pf', 'p8to', 'p8oreb', 'p8dreb', 'p8fg', 'p8fg%', 'p8ft', 'p8ft%', 'p83pt', 'p83pt%', 'p8+/-','team1p9', 'p9min', 'p9pts', 'p9reb', 'p9ast', 'p9stl', 'p9blk', 'p9pf', 'p9to', 'p9oreb', 'p9dreb', 'p9fg', 'p9fg%', 'p9ft', 'p9ft%', 'p93pt', 'p93pt%', 'p9+/-','team1p10', 'p10min', 'p10pts', 'p10reb', 'p10ast', 'p10stl', 'p10blk', 'p10pf', 'p10to', 'p10oreb', 'p10dreb', 'p10fg', 'p10fg%', 'p10ft', 'p10ft%', 'p103pt', 'p103pt%', 'p10+/-','team1p11', 'p11min', 'p11pts', 'p11reb', 'p11ast', 'p11stl', 'p11blk', 'p11pf', 'p11to', 'p11oreb', 'p11dreb', 'p11fg', 'p11fg%', 'p11ft', 'p11ft%', 'p113pt', 'p113pt%', 'p11+/-','team1p12', 'p12min', 'p12pts', 'p12reb', 'p12ast', 'p12stl', 'p12blk', 'p12pf', 'p12to', 'p12oreb', 'p12dreb', 'p12fg', 'p12fg%', 'p12ft', 'p12ft%', 'p123pt', 'p123pt%', 'p12+/-','team1p13', 'p13min', 'p13pts', 'p13reb', 'p13ast', 'p13stl', 'p13blk', 'p13pf', 'p13to', 'p13oreb', 'p13dreb', 'p13fg', 'p13fg%', 'p13ft', 'p13ft%', 'p133pt', 'p133pt%', 'p13+/-','team1p14', 'p14min', 'p14pts', 'p14reb', 'p14ast', 'p14stl', 'p14blk', 'p14pf', 'p14to', 'p14oreb', 'p14dreb', 'p14fg', 'p14fg%', 'p14ft', 'p14ft%', 'p143pt', 'p143pt%', 'p14+/-','team2p1', 'p1min', 'p1pts', 'p1reb', 'p1ast', 'p1stl', 'p1blk', 'p1pf', 'p1to', 'p1oreb', 'p1dreb', 'p1fg', 'p1fg%', 'p1ft', 'p1ft%', 'p13pt', 'p13pt%', 'p1+/-','team2p2', 'p2min', 'p2pts', 'p2reb', 'p2ast', 'p2stl', 'p2blk', 'p2pf', 'p2to', 'p2oreb', 'p2dreb', 'p2fg', 'p2fg%', 'p2ft', 'p2ft%', 'p23pt', 'p23pt%', 'p2+/-','team2p3', 'p3min', 'p3pts', 'p3reb', 'p3ast', 'p3stl', 'p3blk', 'p3pf', 'p3to', 'p3oreb', 'p3dreb', 'p3fg', 'p3fg%', 'p3ft', 'p3ft%', 'p33pt', 'p33pt%', 'p3+/-','team2p4', 'p4min', 'p4pts', 'p4reb', 'p4ast', 'p4stl', 'p4blk', 'p4pf', 'p4to', 'p4oreb', 'p4dreb', 'p4fg', 'p4fg%', 'p4ft', 'p4ft%', 'p43pt', 'p43pt%', 'p4+/-','team2p5', 'p5min', 'p5pts', 'p5reb', 'p5ast', 'p5stl', 'p5blk', 'p5pf', 'p5to', 'p5oreb', 'p5dreb', 'p5fg', 'p5fg%', 'p5ft', 'p5ft%', 'p53pt', 'p53pt%', 'p5+/-','team2p6', 'p6min', 'p6pts', 'p6reb', 'p6ast', 'p6stl', 'p6blk', 'p6pf', 'p6to', 'p6oreb', 'p6dreb', 'p6fg', 'p6fg%', 'p6ft', 'p6ft%', 'p63pt', 'p63pt%', 'p6+/-','team2p7', 'p7min', 'p7pts', 'p7reb', 'p7ast', 'p7stl', 'p7blk', 'p7pf', 'p7to', 'p7oreb', 'p7dreb', 'p7fg', 'p7fg%', 'p7ft', 'p7ft%', 'p73pt', 'p73pt%', 'p7+/-','team2p8', 'p8min', 'p8pts', 'p8reb', 'p8ast', 'p8stl', 'p8blk', 'p8pf', 'p8to', 'p8oreb', 'p8dreb', 'p8fg', 'p8fg%', 'p8ft', 'p8ft%', 'p83pt', 'p83pt%', 'p8+/-','team2p9', 'p9min', 'p9pts', 'p9reb', 'p9ast', 'p9stl', 'p9blk', 'p9pf', 'p9to', 'p9oreb', 'p9dreb', 'p9fg', 'p9fg%', 'p9ft', 'p9ft%', 'p93pt', 'p93pt%', 'p9+/-','team2p10', 'p10min', 'p10pts', 'p10reb', 'p10ast', 'p10stl', 'p10blk', 'p10pf', 'p10to', 'p10oreb', 'p10dreb', 'p10fg', 'p10fg%', 'p10ft', 'p10ft%', 'p103pt', 'p103pt%', 'p10+/-','team2p11', 'p11min', 'p11pts', 'p11reb', 'p11ast', 'p11stl', 'p11blk', 'p11pf', 'p11to', 'p11oreb', 'p11dreb', 'p11fg', 'p11fg%', 'p11ft', 'p11ft%', 'p113pt', 'p113pt%', 'p11+/-','team2p12', 'p12min', 'p12pts', 'p12reb', 'p12ast', 'p12stl', 'p12blk', 'p12pf', 'p12to', 'p12oreb', 'p12dreb', 'p12fg', 'p12fg%', 'p12ft', 'p12ft%', 'p123pt', 'p123pt%', 'p12+/-','team2p13', 'p13min', 'p13pts', 'p13reb', 'p13ast', 'p13stl', 'p13blk', 'p13pf', 'p13to', 'p13oreb', 'p13dreb', 'p13fg', 'p13fg%', 'p13ft', 'p13ft%', 'p133pt', 'p133pt%', 'p13+/-','team2p14', 'p14min', 'p14pts', 'p14reb', 'p14ast', 'p14stl', 'p14blk', 'p14pf', 'p14to', 'p14oreb', 'p14dreb', 'p14fg', 'p14fg%', 'p14ft', 'p14ft%', 'p143pt', 'p143pt%', 'p14+/-'])
df.to_csv('append.csv', index = False)
print(df)

   team1 team2 favorite spread over_under team1_score team2_score  \
0    CHA    LA      LAL   -7.5      224.5         105         116   
1     NO   POR      POR   -1.0      239.0          93         101   
2    MIN   PHX      PHX  -11.5      232.0         123         119   
3    ORL    NY      NYK   -4.5      206.5          93          94   
4    OKC   ATL      ATL   -7.5      225.0          93         116   
..   ...   ...      ...    ...        ...         ...         ...   
63   ATL   TOR      ATL   -3.0      226.0         121         120   
64   BOS   BKN      BKN   -2.5      232.5         109         121   
65   DET   CHA      CHA   -4.5      225.0         102         105   
66    SA   DAL      DAL   -4.5      227.0         104         115   
67   WSH   MEM      MEM   -3.5      237.0         112         127   

                team1p1  p1min  p1pts  ...  p14to  p14oreb  p14dreb  p14fg  \
0     Cody Zeller (F-C)     11      4  ...      0        0        0    0/0   
1      Steven A

In [77]:
lines =pd.read_csv('lines.csv', sep=',', thousands=',',encoding='unicode_escape')
append =pd.read_csv('append.csv', sep=',', thousands=',',encoding='unicode_escape')
lines

Unnamed: 0,team1,team2,favorite,spread,over_under,team1_score,team2_score,team1p1,p1min,p1pts,...,p14to.1,p14oreb.1,p14dreb.1,p14fg.1,p14fg%.1,p14ft.1,p14ft%.1,p143pt.1,p143pt%.1,p14+/-.1
0,CHA,LA,LAL,-7.5,224.5,105,116,Cody Zeller (F-C),11,4,...,0,0,0,0/0,-,0/0,-,0/0,-,0
1,NO,POR,POR,-1.0,239.0,93,101,Steven Adams (C),34,6,...,0,0,0,0/0,-,0/0,-,0/0,-,0
2,MIN,PHX,PHX,-11.5,232.0,123,119,K. Towns (C-F),35,41,...,0,0,0,0/0,-,0/0,-,0/0,-,0
3,ORL,NY,NYK,-4.5,206.5,93,94,Nikola Vucevic (C),35,17,...,0,0,0,0/0,-,0/0,-,0/0,-,0
4,OKC,ATL,ATL,-7.5,225.0,93,116,Al Horford (C-F),24,11,...,0,0,2,0/2,.000,0/0,-,0/0,-,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,NO,TOR,TOR,-3.5,228.5,113,99,Zion Williamson (F),29,15,...,0,0,0,0/0,-,0/0,-,0/0,-,0
560,NY,IND,IND,-7.5,215.0,107,121,Julius Randle (F-C),35,17,...,0,0,0,0/0,-,0/0,-,0/0,-,0
561,WSH,PHI,PHI,-7.0,230.5,107,113,Deni Avdija (F),28,7,...,0,0,0,0/0,-,0/0,-,0/0,-,0
562,CHA,CLE,CHA,-2.5,216.5,114,121,P.J. Washington (F),21,4,...,0,0,0,0/0,-,0/0,-,0/0,-,0


In [72]:
bro = append.append(lines)
lines.reset_index(drop=True, inplace=True)
lines

Unnamed: 0,team1,team2,favorite,spread,over_under,team1_score,team2_score,team1p1,p1min,p1pts,...,p14to.1,p14oreb.1,p14dreb.1,p14fg.1,p14fg%.1,p14ft.1,p14ft%.1,p143pt.1,p143pt%.1,p14+/-.1
135,DET,NY,NYK,-7.0,206.0,104,114,Mason Plumlee (F-C),27,12,...,0,0,0,0/0,-,0/0,-,0/0,-,0
136,DET,CHI,CHI,-3.0,219.0,102,105,Mason Plumlee (F-C),33,11,...,0,0,0,0/0,-,0/0,-,0/0,-,0
137,DEN,CLE,DEN,-9.5,225.0,120,103,Nikola Jokic (C),32,16,...,0,0,0,0/0,-,0/0,-,0/0,-,0
138,GS,PHX,PHX,-12.5,221.0,98,120,James Wiseman (C),34,11,...,0,0,0,0/0,-,0/0,-,0/0,-,0
139,OKC,MEM,MEM,-6.5,228.0,113,122,Isaiah Roby (F),23,10,...,0,0,0,0/0,-,0/0,-,0/0,-,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
626,NO,TOR,TOR,-3.5,228.5,113,99,Zion Williamson (F),29,15,...,0,0,0,0/0,-,0/0,-,0/0,-,0
627,NY,IND,IND,-7.5,215.0,107,121,Julius Randle (F-C),35,17,...,0,0,0,0/0,-,0/0,-,0/0,-,0
628,WSH,PHI,PHI,-7.0,230.5,107,113,Deni Avdija (F),28,7,...,0,0,0,0/0,-,0/0,-,0/0,-,0
629,CHA,CLE,CHA,-2.5,216.5,114,121,P.J. Washington (F),21,4,...,0,0,0,0/0,-,0/0,-,0/0,-,0


In [75]:
lines

In [None]:
bro.to_csv('append.csv', index = False)
