## theScore Data Scraper

The following code snippets allow us to gather betting line data at closing for NBA games.

In [55]:
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [89]:
# constants that affect scrapage
baseURL = 'https://www.thescore.com/nba/events/'
most_recent_game_id = 508039
num_games = 500

In [96]:

response_data = []
for game_id in range(most_recent_game_id, most_recent_game_id - num_games, -1):
    page = requests.get(baseURL + str(game_id))
    soup = BeautifulSoup(page.content, 'html.parser')
    # only true if the game has already finished
    label_text = soup.find_all('div', {'class': 'GameDetailsCard__label--iBMhJ'})
    if not label_text:
        continue
    has_closing_odds = label_text[len(label_text) - 1].contents[0] == 'Closing Odds:'
    if has_closing_odds:
        team_data = soup.find_all('div', {'class': 'Matchup__teamName--vqpde'})
        if not team_data:
            continue
        team_1_abbrv = team_data[0].contents[0].partition(' ')[0]
        team_2_abbrv = team_data[1].contents[0].partition(' ')[0]
        odds = soup.find_all('div', {'class': 'GameDetailsCard__content--2L_KF'})
        if len(odds) > 0:
            odds_string = odds[len(odds) - 1].contents[0]
            odds_partition = odds_string.split()
            if len(odds_partition) < 4:
                continue
            favorite = odds_partition[0]
            spread = odds_partition[1][0:len(odds_partition[1]) - 1]
            ou = odds_partition[3]
            scores = soup.find_all('div', {'class': 'Matchup__teamScore--2BeCA'})
            if not scores:
                continue
            team_1_score = scores[0].contents[0]
            team_2_score = scores[1].contents[0]
            response_data.append([team_1_abbrv, 
                                  team_2_abbrv,
                                  favorite,
                                  spread,
                                  ou,
                                  team_1_score,
                                  team_2_score])


df = pd.DataFrame(response_data,
                  columns = ['team1', 
                             'team2', 
                             'favorite', 
                             'spread', 
                             'over_under', 
                             'team1_score',
                             'team2_score'])
df.to_csv('lines.csv', index = False)
print(df)

    team1 team2 favorite spread over_under team1_score team2_score
0     CHA   POR      POR   -8.0      229.5         111         123
1     BKN    SA      BKN   -4.0      233.5         124         113
2     DEN   CHI      DEN   -4.5      226.5         118         112
3     UTA    NO      UTA   -7.0      237.5         124         129
4     CLE   HOU      HOU   -3.0      216.5         101          90
..    ...   ...      ...    ...        ...         ...         ...
437   MIN    LA      LAL  -10.5      229.0          91         127
438   PHX   SAC      PHX   -3.5      226.0         116         100
439    GS   CHI       GS   -3.5      229.5         129         128
440   BOS   IND      BOS   -3.0      220.5         107         108
441   MIL    NY      MIL  -13.0      222.0         110         130

[442 rows x 7 columns]
