## Betting Lines Scrape

This notebook's primary function is to scrape the website sportsbookreview.com for gambling lines (both totals and spreads) corresponding to each game in the 2014-2018 NBA seasons. I took the data previously collected from basketball-reference.com, and used the dates when games were played to query the website and collect each day's betting lines. I then put this historical data in a JSON file.

In [None]:
import json
import requests
from bs4 import BeautifulSoup
import time
import csv
import pandas as pd

#### dataframe_loader: 

- returns a dataframe containing one years games from a list of jsons containing each teams games for the season

In [None]:
def dataframe_loader(years_games):
    years_stats = []
    for game in years_games:
        with open(f'{game}') as g:
            years_stats.append(json.load(g))
    all_games_year = [team for game_list in years_stats for game in game_list for team in game]
    df_year = pd.DataFrame(all_games_year)
    return df_year

In [None]:
gl_2014 = !ls ../raw_data_files/*_2014.json
gl_2015 = !ls ../raw_data_files/*_2015.json
gl_2016 = !ls ../raw_data_files/*_2016.json
gl_2017 = !ls ../raw_data_files/*_2017.json
gl_2018 = !ls ../raw_data_files/*_2018.json

In [None]:
df_2014 = dataframe_loader(gl_2014)
df_2015 = dataframe_loader(gl_2015)
df_2016 = dataframe_loader(gl_2016)
df_2017 = dataframe_loader(gl_2017)
df_2018 = dataframe_loader(gl_2018)



In [None]:
df_all = df_2014.append([df_2015, df_2016, df_2017, df_2018], ignore_index=True)

In [None]:
df_all['date'] = df_all[0].map(lambda x: x[:8])

In [None]:
df_all.head()

In [None]:
date_list = [date for date in df_all['date'].unique()]

In [None]:
len(date_list)

#### get_betting_lines: 
- Function takes in a date, queries sportsbookreview.com for the corresponding page, and returns the day's betting lines labeled by team

In [None]:
def get_betting_lines(date):
    info_list = []
    betting_page = requests.get(f'https://www.sportsbookreview.com/betting-odds/nba-basketball/merged/?date={date}')
    time.sleep(2)
    betting_page = BeautifulSoup(betting_page.text, 'html.parser')
    teams_list = []
    for row in betting_page.find_all('div', {'class': 'eventLine-value'}):
        teams_list.append(row.text)
    betting_lines = []
    for item in betting_page.find_all('div', {'class': 'event-holder holder-complete'}):
        for line in item.find('div', {'class': 'el-div eventLine-book'}):
            betting_lines.append(line.text)
    betting_lines = [line.replace('\xa0', ' ') for line in betting_lines]
    
    date_list = [date for item in betting_lines]
    zipped_teams_lines = zip(date_list, teams_list, betting_lines)
    return list(zipped_teams_lines)

In [None]:
all_lines = []
for date in date_list:
    all_lines.append(get_betting_lines(date))
    

In [None]:
with open('all_gambling_lines.json', 'w') as f:
    json.dump(all_lines, f)