## theScore Data Scraper

The following code snippets allow us to gather betting line data at closing for NBA games.

In [1]:
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [4]:
# constants that affect scrapage
baseURL = 'https://www.thescore.com/nba/events/'
most_recent_game_id = 508055
earliest_game_id = 507514
# There will be a total of 1080 games this season, 30*72/2; 1230 in a normal season
num_games = most_recent_game_id - earliest_game_id

In [5]:

response_data = []
for game_id in range(most_recent_game_id, most_recent_game_id - num_games, -1):
    page = requests.get(baseURL + str(game_id))
    soup = BeautifulSoup(page.content, 'html.parser')
    # only true if the game has already finished
    label_text = soup.find_all('div', {'class': 'GameDetailsCard__label--iBMhJ'})
    if not label_text:
        continue
    has_closing_odds = label_text[len(label_text) - 1].contents[0] == 'Closing Odds:'
    if has_closing_odds:
        team_data = soup.find_all('div', {'class': 'Matchup__teamName--vqpde'})
        if not team_data:
            continue
        team_1_abbrv = team_data[0].contents[0].partition(' ')[0]
        team_2_abbrv = team_data[1].contents[0].partition(' ')[0]
        odds = soup.find_all('div', {'class': 'GameDetailsCard__content--2L_KF'})
        if len(odds) > 0:
            odds_string = odds[len(odds) - 1].contents[0]
            odds_partition = odds_string.split()
            if len(odds_partition) < 4:
                continue
            favorite = odds_partition[0]
            spread = odds_partition[1][0:len(odds_partition[1]) - 1]
            ou = odds_partition[3]
            scores = soup.find_all('div', {'class': 'Matchup__teamScore--2BeCA'})
            if not scores:
                continue
            team_1_score = scores[0].contents[0]
            team_2_score = scores[1].contents[0]
            response_data.append([team_1_abbrv, 
                                  team_2_abbrv,
                                  favorite,
                                  spread,
                                  ou,
                                  team_1_score,
                                  team_2_score])


df = pd.DataFrame(response_data,
                  columns = ['team1', 
                             'team2', 
                             'favorite', 
                             'spread', 
                             'over_under', 
                             'team1_score',
                             'team2_score'])
df.to_csv('lines.csv', index = False)
print(df)

    team1 team2 favorite spread over_under team1_score team2_score
0      LA   SAC      SAC   -4.0      223.5         120         123
1      GS   POR       GS   -1.0      232.5         106         108
2     OKC   DAL      DAL   -5.0      221.0          78          87
3     CHI    NO      NOP   -5.5      237.0         128         124
4     CHA   MIN      CHA   -2.5      234.5         135         102
..    ...   ...      ...    ...        ...         ...         ...
474   ATL   CHI      ATL   -2.0      233.0         124         104
475    NO   TOR      TOR   -3.5      228.5         113          99
476    NY   IND      IND   -7.5      215.0         107         121
477   WSH   PHI      PHI   -7.0      230.5         107         113
478   CHA   CLE      CHA   -2.5      216.5         114         121

[479 rows x 7 columns]
